diff --git a/llvm/lib/Support/Unix/Memory.inc b/llvm/lib/Support/Unix/Memory.inc index 4c8f6b2ea7d3a09968bb7432a5796cc58a5f9c26..69bd1164343da743a24931d725e6e2490d350a2a 100644 --- a/llvm/lib/Support/Unix/Memory.inc +++ b/llvm/lib/Support/Unix/Memory.inc @@ -237,7 +237,8 @@ void Memory::InvalidateInstructionCache(const void *Addr, size_t Len) { for (intptr_t Line = StartLine; Line < EndLine; Line += LineSize) asm volatile("icbi 0, %0" : : "r"(Line)); asm volatile("isync"); -#elif (defined(__arm__) || defined(__aarch64__) || defined(__mips__)) && \ +#elif (defined(__arm__) || defined(__aarch64__) || defined(__loongarch__) || \ + defined(__mips__)) && \ defined(__GNUC__) // FIXME: Can we safely always call this for __GNUC__ everywhere? const char *Start = static_cast(Addr); diff --git a/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp b/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp index 51df0463e23524850cea69550a4822bdadebcc94..18a532b55ee5a924f13a9b3af5d3f514aea64563 100644 --- a/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp @@ -153,18 +153,12 @@ static void doAtomicBinOpExpansion(const LoongArchInstrInfo *TII, Register ScratchReg = MI.getOperand(1).getReg(); Register AddrReg = MI.getOperand(2).getReg(); Register IncrReg = MI.getOperand(3).getReg(); - AtomicOrdering Ordering = - static_cast(MI.getOperand(4).getImm()); // .loop: - // if(Ordering != AtomicOrdering::Monotonic) - // dbar 0 // ll.[w|d] dest, (addr) // binop scratch, dest, val // sc.[w|d] scratch, scratch, (addr) // beqz scratch, loop - if (Ordering != AtomicOrdering::Monotonic) - BuildMI(LoopMBB, DL, TII->get(LoongArch::DBAR)).addImm(0); BuildMI(LoopMBB, DL, TII->get(Width == 32 ? LoongArch::LL_W : LoongArch::LL_D), DestReg) .addReg(AddrReg) @@ -251,12 +245,8 @@ static void doMaskedAtomicBinOpExpansion( Register AddrReg = MI.getOperand(2).getReg(); Register IncrReg = MI.getOperand(3).getReg(); Register MaskReg = MI.getOperand(4).getReg(); - AtomicOrdering Ordering = - static_cast(MI.getOperand(5).getImm()); // .loop: - // if(Ordering != AtomicOrdering::Monotonic) - // dbar 0 // ll.w destreg, (alignedaddr) // binop scratch, destreg, incr // xor scratch, destreg, scratch @@ -264,8 +254,6 @@ static void doMaskedAtomicBinOpExpansion( // xor scratch, destreg, scratch // sc.w scratch, scratch, (alignedaddr) // beqz scratch, loop - if (Ordering != AtomicOrdering::Monotonic) - BuildMI(LoopMBB, DL, TII->get(LoongArch::DBAR)).addImm(0); BuildMI(LoopMBB, DL, TII->get(LoongArch::LL_W), DestReg) .addReg(AddrReg) .addImm(0); @@ -372,23 +360,20 @@ bool LoongArchExpandAtomicPseudo::expandAtomicMinMaxOp( auto LoopHeadMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); auto LoopIfBodyMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); auto LoopTailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); - auto TailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); // Insert new MBBs. MF->insert(++MBB.getIterator(), LoopHeadMBB); MF->insert(++LoopHeadMBB->getIterator(), LoopIfBodyMBB); MF->insert(++LoopIfBodyMBB->getIterator(), LoopTailMBB); - MF->insert(++LoopTailMBB->getIterator(), TailMBB); - MF->insert(++TailMBB->getIterator(), DoneMBB); + MF->insert(++LoopTailMBB->getIterator(), DoneMBB); // Set up successors and transfer remaining instructions to DoneMBB. LoopHeadMBB->addSuccessor(LoopIfBodyMBB); LoopHeadMBB->addSuccessor(LoopTailMBB); LoopIfBodyMBB->addSuccessor(LoopTailMBB); LoopTailMBB->addSuccessor(LoopHeadMBB); - LoopTailMBB->addSuccessor(TailMBB); - TailMBB->addSuccessor(DoneMBB); + LoopTailMBB->addSuccessor(DoneMBB); DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end()); DoneMBB->transferSuccessors(&MBB); MBB.addSuccessor(LoopHeadMBB); @@ -402,11 +387,9 @@ bool LoongArchExpandAtomicPseudo::expandAtomicMinMaxOp( // // .loophead: - // dbar 0 // ll.w destreg, (alignedaddr) // and scratch2, destreg, mask // move scratch1, destreg - BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::DBAR)).addImm(0); BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::LL_W), DestReg) .addReg(AddrReg) .addImm(0); @@ -463,7 +446,6 @@ bool LoongArchExpandAtomicPseudo::expandAtomicMinMaxOp( // .looptail: // sc.w scratch1, scratch1, (addr) // beqz scratch1, loop - // dbar 0x700 BuildMI(LoopTailMBB, DL, TII->get(LoongArch::SC_W), Scratch1Reg) .addReg(Scratch1Reg) .addReg(AddrReg) @@ -472,10 +454,6 @@ bool LoongArchExpandAtomicPseudo::expandAtomicMinMaxOp( .addReg(Scratch1Reg) .addMBB(LoopHeadMBB); - // .tail: - // dbar 0x700 - BuildMI(TailMBB, DL, TII->get(LoongArch::DBAR)).addImm(0x700); - NextMBBI = MBB.end(); MI.eraseFromParent(); @@ -483,7 +461,6 @@ bool LoongArchExpandAtomicPseudo::expandAtomicMinMaxOp( computeAndAddLiveIns(LiveRegs, *LoopHeadMBB); computeAndAddLiveIns(LiveRegs, *LoopIfBodyMBB); computeAndAddLiveIns(LiveRegs, *LoopTailMBB); - computeAndAddLiveIns(LiveRegs, *TailMBB); computeAndAddLiveIns(LiveRegs, *DoneMBB); return true; @@ -535,12 +512,10 @@ bool LoongArchExpandAtomicPseudo::expandAtomicCmpXchg( .addReg(CmpValReg) .addMBB(TailMBB); // .looptail: - // dbar 0 // move scratch, newval // sc.[w|d] scratch, scratch, (addr) // beqz scratch, loophead // b done - BuildMI(LoopTailMBB, DL, TII->get(LoongArch::DBAR)).addImm(0); BuildMI(LoopTailMBB, DL, TII->get(LoongArch::OR), ScratchReg) .addReg(NewValReg) .addReg(LoongArch::R0); @@ -573,13 +548,11 @@ bool LoongArchExpandAtomicPseudo::expandAtomicCmpXchg( .addMBB(TailMBB); // .looptail: - // dbar 0 // andn scratch, dest, mask // or scratch, scratch, newval // sc.[w|d] scratch, scratch, (addr) // beqz scratch, loophead // b done - BuildMI(LoopTailMBB, DL, TII->get(LoongArch::DBAR)).addImm(0); BuildMI(LoopTailMBB, DL, TII->get(LoongArch::ANDN), ScratchReg) .addReg(DestReg) .addReg(MaskReg); @@ -598,9 +571,24 @@ bool LoongArchExpandAtomicPseudo::expandAtomicCmpXchg( BuildMI(LoopTailMBB, DL, TII->get(LoongArch::B)).addMBB(DoneMBB); } + AtomicOrdering FailureOrdering = + static_cast(MI.getOperand(IsMasked ? 6 : 5).getImm()); + int hint; + + switch (FailureOrdering) { + case AtomicOrdering::Acquire: + case AtomicOrdering::AcquireRelease: + case AtomicOrdering::SequentiallyConsistent: + // acquire + hint = 0b10100; + break; + default: + hint = 0x700; + } + // .tail: - // dbar 0x700 - BuildMI(TailMBB, DL, TII->get(LoongArch::DBAR)).addImm(0x700); + // dbar 0x700 | acquire + BuildMI(TailMBB, DL, TII->get(LoongArch::DBAR)).addImm(hint); NextMBBI = MBB.end(); MI.eraseFromParent(); diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index 5affaf37ad5a19ebe3f9ec124986c319b171f647..4fc2b4709840bfb3ab9cfa1015d80597e6c0d7c5 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -159,6 +159,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, // The MULO libcall is not part of libgcc, only compiler-rt. setLibcallName(RTLIB::MULO_I128, nullptr); + setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); + static const ISD::CondCode FPCCToExpand[] = { ISD::SETOGT, ISD::SETOGE, ISD::SETUGT, ISD::SETUGE, ISD::SETGE, ISD::SETNE, ISD::SETGT}; @@ -366,6 +368,8 @@ bool LoongArchTargetLowering::isOffsetFoldingLegal( SDValue LoongArchTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { + case ISD::ATOMIC_FENCE: + return lowerATOMIC_FENCE(Op, DAG); case ISD::EH_DWARF_CFA: return lowerEH_DWARF_CFA(Op, DAG); case ISD::GlobalAddress: @@ -542,6 +546,22 @@ LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, return SDValue(); } +SDValue LoongArchTargetLowering::lowerATOMIC_FENCE(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + SyncScope::ID FenceSSID = + static_cast(Op.getConstantOperandVal(2)); + + // singlethread fences only synchronize with signal handlers on the same + // thread and thus only need to preserve instruction order, not actually + // enforce memory ordering. + if (FenceSSID == SyncScope::SingleThread) + // MEMBARRIER is a compiler barrier; it codegens to a no-op. + return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0)); + + return Op; +} + SDValue LoongArchTargetLowering::lowerWRITE_REGISTER(SDValue Op, SelectionDAG &DAG) const { @@ -4472,8 +4492,9 @@ LoongArchTargetLowering::shouldExpandAtomicCmpXchgInIR( Value *LoongArchTargetLowering::emitMaskedAtomicCmpXchgIntrinsic( IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const { - Value *Ordering = - Builder.getIntN(Subtarget.getGRLen(), static_cast(Ord)); + AtomicOrdering FailOrd = CI->getFailureOrdering(); + Value *FailureOrdering = + Builder.getIntN(Subtarget.getGRLen(), static_cast(FailOrd)); // TODO: Support cmpxchg on LA32. Intrinsic::ID CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i64; @@ -4484,7 +4505,7 @@ Value *LoongArchTargetLowering::emitMaskedAtomicCmpXchgIntrinsic( Function *MaskedCmpXchg = Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys); Value *Result = Builder.CreateCall( - MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering}); + MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, FailureOrdering}); Result = Builder.CreateTrunc(Result, Builder.getInt32Ty()); return Result; } @@ -4872,3 +4893,8 @@ bool LoongArchTargetLowering::hasAndNotCompare(SDValue Y) const { return !isa(Y); } + +ISD::NodeType LoongArchTargetLowering::getExtendForAtomicCmpSwapArg() const { + // TODO: LAMCAS will use amcas{_DB,}.[bhwd] which does not require extension. + return ISD::SIGN_EXTEND; +} diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h index 6b5a851ec55d01425b37965351b10b58bf2e84fc..2c9826a13237b4bb5992c78d8cbd5ed68f933632 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h @@ -203,6 +203,8 @@ public: return ISD::SIGN_EXTEND; } + ISD::NodeType getExtendForAtomicCmpSwapArg() const override; + Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override; bool mayBeEmittedAsTailCall(const CallInst *CI) const override; @@ -266,6 +268,7 @@ private: MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override; + SDValue lowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) const; SDValue lowerConstantPool(SDValue Op, SelectionDAG &DAG) const; SDValue lowerEH_DWARF_CFA(SDValue Op, SelectionDAG &DAG) const; SDValue lowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td index 05ae36a9781d93b63277349dd24ebd596cf0b5e1..ab189055681498eedfbfc91879d1efd7606e332c 100644 --- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td @@ -1590,7 +1590,29 @@ def : RegRegStPat; /// Atomic loads and stores -def : Pat<(atomic_fence timm, timm), (DBAR 0)>; +// DBAR hint encoding for LA664 and later micro-architectures, paraphrased from +// the Linux patch revealing it [1]: +// +// - Bit 4: kind of constraint (0: completion, 1: ordering) +// - Bit 3: barrier for previous read (0: true, 1: false) +// - Bit 2: barrier for previous write (0: true, 1: false) +// - Bit 1: barrier for succeeding read (0: true, 1: false) +// - Bit 0: barrier for succeeding write (0: true, 1: false) +// +// Hint 0x700: barrier for "read after read" from the same address, which is +// e.g. needed by LL-SC loops on older models. (DBAR 0x700 behaves the same as +// nop if such reordering is disabled on supporting newer models.) +// +// [1]: https://lore.kernel.org/loongarch/20230516124536.535343-1-chenhuacai@loongson.cn/ +// +// Implementations without support for the finer-granularity hints simply treat +// all as the full barrier (DBAR 0), so we can unconditionally start emiting the +// more precise hints right away. + +def : Pat<(atomic_fence 4, timm), (DBAR 0b10100)>; // acquire +def : Pat<(atomic_fence 5, timm), (DBAR 0b10010)>; // release +def : Pat<(atomic_fence 6, timm), (DBAR 0b10000)>; // acqrel +def : Pat<(atomic_fence 7, timm), (DBAR 0b10000)>; // seqcst defm : LdPat; defm : LdPat; @@ -1731,7 +1753,7 @@ def PseudoMaskedAtomicLoadMin32 : PseudoMaskedAMMinMax; class PseudoCmpXchg : Pseudo<(outs GPR:$res, GPR:$scratch), - (ins GPR:$addr, GPR:$cmpval, GPR:$newval)> { + (ins GPR:$addr, GPR:$cmpval, GPR:$newval, grlenimm:$fail_order)> { let Constraints = "@earlyclobber $res,@earlyclobber $scratch"; let mayLoad = 1; let mayStore = 1; @@ -1745,7 +1767,7 @@ def PseudoCmpXchg64 : PseudoCmpXchg; def PseudoMaskedCmpXchg32 : Pseudo<(outs GPR:$res, GPR:$scratch), (ins GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, - grlenimm:$ordering)> { + grlenimm:$fail_order)> { let Constraints = "@earlyclobber $res,@earlyclobber $scratch"; let mayLoad = 1; let mayStore = 1; @@ -1763,6 +1785,43 @@ class AtomicPat : Pat<(intrin GPR:$addr, GPR:$incr, GPR:$mask, timm:$ordering), (AMInst GPR:$addr, GPR:$incr, GPR:$mask, timm:$ordering)>; +// These atomic cmpxchg PatFrags only care about the failure ordering. +// The PatFrags defined by multiclass `ternary_atomic_op_ord` in +// TargetSelectionDAG.td care about the merged memory ordering that is the +// stronger one between success and failure. But for LoongArch LL-SC we only +// need to care about the failure ordering as explained in PR #67391. So we +// define these PatFrags that will be used to define cmpxchg pats below. +multiclass ternary_atomic_op_failure_ord { + def NAME#_failure_monotonic : PatFrag<(ops node:$ptr, node:$cmp, node:$val), + (!cast(NAME) node:$ptr, node:$cmp, node:$val), [{ + AtomicOrdering Ordering = cast(N)->getFailureOrdering(); + return Ordering == AtomicOrdering::Monotonic; + }]>; + def NAME#_failure_acquire : PatFrag<(ops node:$ptr, node:$cmp, node:$val), + (!cast(NAME) node:$ptr, node:$cmp, node:$val), [{ + AtomicOrdering Ordering = cast(N)->getFailureOrdering(); + return Ordering == AtomicOrdering::Acquire; + }]>; + def NAME#_failure_release : PatFrag<(ops node:$ptr, node:$cmp, node:$val), + (!cast(NAME) node:$ptr, node:$cmp, node:$val), [{ + AtomicOrdering Ordering = cast(N)->getFailureOrdering(); + return Ordering == AtomicOrdering::Release; + }]>; + def NAME#_failure_acq_rel : PatFrag<(ops node:$ptr, node:$cmp, node:$val), + (!cast(NAME) node:$ptr, node:$cmp, node:$val), [{ + AtomicOrdering Ordering = cast(N)->getFailureOrdering(); + return Ordering == AtomicOrdering::AcquireRelease; + }]>; + def NAME#_failure_seq_cst : PatFrag<(ops node:$ptr, node:$cmp, node:$val), + (!cast(NAME) node:$ptr, node:$cmp, node:$val), [{ + AtomicOrdering Ordering = cast(N)->getFailureOrdering(); + return Ordering == AtomicOrdering::SequentiallyConsistent; + }]>; +} + +defm atomic_cmp_swap_32 : ternary_atomic_op_failure_ord; +defm atomic_cmp_swap_64 : ternary_atomic_op_failure_ord; + let Predicates = [IsLA64] in { def : AtomicPat; @@ -1821,14 +1880,28 @@ def : AtomicPat; -def : Pat<(atomic_cmp_swap_64 GPR:$addr, GPR:$cmp, GPR:$new), - (PseudoCmpXchg64 GPR:$addr, GPR:$cmp, GPR:$new)>; +// Ordering constants must be kept in sync with the AtomicOrdering enum in +// AtomicOrdering.h. +multiclass PseudoCmpXchgPat { + def : Pat<(vt (!cast(Op#"_failure_monotonic") GPR:$addr, GPR:$cmp, GPR:$new)), + (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 2)>; + def : Pat<(vt (!cast(Op#"_failure_acquire") GPR:$addr, GPR:$cmp, GPR:$new)), + (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 4)>; + def : Pat<(vt (!cast(Op#"_failure_release") GPR:$addr, GPR:$cmp, GPR:$new)), + (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 5)>; + def : Pat<(vt (!cast(Op#"_failure_acq_rel") GPR:$addr, GPR:$cmp, GPR:$new)), + (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 6)>; + def : Pat<(vt (!cast(Op#"_failure_seq_cst") GPR:$addr, GPR:$cmp, GPR:$new)), + (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 7)>; +} + +defm : PseudoCmpXchgPat<"atomic_cmp_swap_32", PseudoCmpXchg32>; +defm : PseudoCmpXchgPat<"atomic_cmp_swap_64", PseudoCmpXchg64, i64>; def : Pat<(int_loongarch_masked_cmpxchg_i64 - GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering), + GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$fail_order), (PseudoMaskedCmpXchg32 - GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering)>; -def : Pat<(atomic_cmp_swap_32 GPR:$addr, GPR:$cmp, GPR:$new), - (PseudoCmpXchg32 GPR:$addr, GPR:$cmp, GPR:$new)>; + GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$fail_order)>; def : PseudoMaskedAMMinMaxPat; diff --git a/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll b/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll index f11af8fe652842b9158bdedb37bd654b9a52d236..f0baf19bcf0ebebe270c494f532f6c5edcbe9c54 100644 --- a/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll +++ b/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll @@ -26,29 +26,27 @@ define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) { ; LA64-NEXT: andi $a5, $a5, 255 ; LA64-NEXT: sll.w $a5, $a5, $a0 ; LA64-NEXT: and $a6, $a3, $a4 -; LA64-NEXT: or $a6, $a6, $a5 +; LA64-NEXT: or $a5, $a6, $a5 +; LA64-NEXT: addi.w $a6, $a3, 0 ; LA64-NEXT: .LBB0_3: # %atomicrmw.start ; LA64-NEXT: # Parent Loop BB0_1 Depth=1 ; LA64-NEXT: # => This Inner Loop Header: Depth=2 -; LA64-NEXT: ll.w $a5, $a2, 0 -; LA64-NEXT: bne $a5, $a3, .LBB0_5 +; LA64-NEXT: ll.w $a3, $a2, 0 +; LA64-NEXT: bne $a3, $a6, .LBB0_5 ; LA64-NEXT: # %bb.4: # %atomicrmw.start ; LA64-NEXT: # in Loop: Header=BB0_3 Depth=2 -; LA64-NEXT: dbar 0 -; LA64-NEXT: move $a7, $a6 +; LA64-NEXT: move $a7, $a5 ; LA64-NEXT: sc.w $a7, $a2, 0 ; LA64-NEXT: beqz $a7, .LBB0_3 ; LA64-NEXT: b .LBB0_6 ; LA64-NEXT: .LBB0_5: # %atomicrmw.start ; LA64-NEXT: # in Loop: Header=BB0_1 Depth=1 -; LA64-NEXT: dbar 1792 +; LA64-NEXT: dbar 20 ; LA64-NEXT: .LBB0_6: # %atomicrmw.start ; LA64-NEXT: # in Loop: Header=BB0_1 Depth=1 -; LA64-NEXT: addi.w $a6, $a3, 0 -; LA64-NEXT: move $a3, $a5 -; LA64-NEXT: bne $a5, $a6, .LBB0_1 +; LA64-NEXT: bne $a3, $a6, .LBB0_1 ; LA64-NEXT: # %bb.2: # %atomicrmw.end -; LA64-NEXT: srl.w $a0, $a5, $a0 +; LA64-NEXT: srl.w $a0, $a3, $a0 ; LA64-NEXT: ret %result = atomicrmw uinc_wrap ptr %ptr, i8 %val seq_cst ret i8 %result @@ -80,29 +78,27 @@ define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) { ; LA64-NEXT: bstrpick.d $a5, $a5, 15, 0 ; LA64-NEXT: sll.w $a5, $a5, $a0 ; LA64-NEXT: and $a6, $a3, $a4 -; LA64-NEXT: or $a6, $a6, $a5 +; LA64-NEXT: or $a5, $a6, $a5 +; LA64-NEXT: addi.w $a6, $a3, 0 ; LA64-NEXT: .LBB1_3: # %atomicrmw.start ; LA64-NEXT: # Parent Loop BB1_1 Depth=1 ; LA64-NEXT: # => This Inner Loop Header: Depth=2 -; LA64-NEXT: ll.w $a5, $a2, 0 -; LA64-NEXT: bne $a5, $a3, .LBB1_5 +; LA64-NEXT: ll.w $a3, $a2, 0 +; LA64-NEXT: bne $a3, $a6, .LBB1_5 ; LA64-NEXT: # %bb.4: # %atomicrmw.start ; LA64-NEXT: # in Loop: Header=BB1_3 Depth=2 -; LA64-NEXT: dbar 0 -; LA64-NEXT: move $a7, $a6 +; LA64-NEXT: move $a7, $a5 ; LA64-NEXT: sc.w $a7, $a2, 0 ; LA64-NEXT: beqz $a7, .LBB1_3 ; LA64-NEXT: b .LBB1_6 ; LA64-NEXT: .LBB1_5: # %atomicrmw.start ; LA64-NEXT: # in Loop: Header=BB1_1 Depth=1 -; LA64-NEXT: dbar 1792 +; LA64-NEXT: dbar 20 ; LA64-NEXT: .LBB1_6: # %atomicrmw.start ; LA64-NEXT: # in Loop: Header=BB1_1 Depth=1 -; LA64-NEXT: addi.w $a6, $a3, 0 -; LA64-NEXT: move $a3, $a5 -; LA64-NEXT: bne $a5, $a6, .LBB1_1 +; LA64-NEXT: bne $a3, $a6, .LBB1_1 ; LA64-NEXT: # %bb.2: # %atomicrmw.end -; LA64-NEXT: srl.w $a0, $a5, $a0 +; LA64-NEXT: srl.w $a0, $a3, $a0 ; LA64-NEXT: ret %result = atomicrmw uinc_wrap ptr %ptr, i16 %val seq_cst ret i16 %result @@ -111,38 +107,36 @@ define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) { define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) { ; LA64-LABEL: atomicrmw_uinc_wrap_i32: ; LA64: # %bb.0: -; LA64-NEXT: ld.w $a3, $a0, 0 -; LA64-NEXT: addi.w $a2, $a1, 0 +; LA64-NEXT: ld.w $a2, $a0, 0 +; LA64-NEXT: addi.w $a1, $a1, 0 ; LA64-NEXT: .p2align 4, , 16 ; LA64-NEXT: .LBB2_1: # %atomicrmw.start ; LA64-NEXT: # =>This Loop Header: Depth=1 ; LA64-NEXT: # Child Loop BB2_3 Depth 2 -; LA64-NEXT: addi.w $a4, $a3, 0 -; LA64-NEXT: sltu $a1, $a4, $a2 -; LA64-NEXT: xori $a1, $a1, 1 -; LA64-NEXT: addi.d $a5, $a3, 1 -; LA64-NEXT: masknez $a5, $a5, $a1 +; LA64-NEXT: addi.w $a3, $a2, 0 +; LA64-NEXT: sltu $a4, $a3, $a1 +; LA64-NEXT: xori $a4, $a4, 1 +; LA64-NEXT: addi.d $a2, $a2, 1 +; LA64-NEXT: masknez $a4, $a2, $a4 ; LA64-NEXT: .LBB2_3: # %atomicrmw.start ; LA64-NEXT: # Parent Loop BB2_1 Depth=1 ; LA64-NEXT: # => This Inner Loop Header: Depth=2 -; LA64-NEXT: ll.w $a1, $a0, 0 -; LA64-NEXT: bne $a1, $a3, .LBB2_5 +; LA64-NEXT: ll.w $a2, $a0, 0 +; LA64-NEXT: bne $a2, $a3, .LBB2_5 ; LA64-NEXT: # %bb.4: # %atomicrmw.start ; LA64-NEXT: # in Loop: Header=BB2_3 Depth=2 -; LA64-NEXT: dbar 0 -; LA64-NEXT: move $a6, $a5 -; LA64-NEXT: sc.w $a6, $a0, 0 -; LA64-NEXT: beqz $a6, .LBB2_3 +; LA64-NEXT: move $a5, $a4 +; LA64-NEXT: sc.w $a5, $a0, 0 +; LA64-NEXT: beqz $a5, .LBB2_3 ; LA64-NEXT: b .LBB2_6 ; LA64-NEXT: .LBB2_5: # %atomicrmw.start ; LA64-NEXT: # in Loop: Header=BB2_1 Depth=1 -; LA64-NEXT: dbar 1792 +; LA64-NEXT: dbar 20 ; LA64-NEXT: .LBB2_6: # %atomicrmw.start ; LA64-NEXT: # in Loop: Header=BB2_1 Depth=1 -; LA64-NEXT: move $a3, $a1 -; LA64-NEXT: bne $a1, $a4, .LBB2_1 +; LA64-NEXT: bne $a2, $a3, .LBB2_1 ; LA64-NEXT: # %bb.2: # %atomicrmw.end -; LA64-NEXT: move $a0, $a1 +; LA64-NEXT: move $a0, $a2 ; LA64-NEXT: ret %result = atomicrmw uinc_wrap ptr %ptr, i32 %val seq_cst ret i32 %result @@ -168,14 +162,13 @@ define i64 @atomicrmw_uinc_wrap_i64(ptr %ptr, i64 %val) { ; LA64-NEXT: bne $a2, $a3, .LBB3_5 ; LA64-NEXT: # %bb.4: # %atomicrmw.start ; LA64-NEXT: # in Loop: Header=BB3_3 Depth=2 -; LA64-NEXT: dbar 0 ; LA64-NEXT: move $a5, $a4 ; LA64-NEXT: sc.d $a5, $a0, 0 ; LA64-NEXT: beqz $a5, .LBB3_3 ; LA64-NEXT: b .LBB3_6 ; LA64-NEXT: .LBB3_5: # %atomicrmw.start ; LA64-NEXT: # in Loop: Header=BB3_1 Depth=1 -; LA64-NEXT: dbar 1792 +; LA64-NEXT: dbar 20 ; LA64-NEXT: .LBB3_6: # %atomicrmw.start ; LA64-NEXT: # in Loop: Header=BB3_1 Depth=1 ; LA64-NEXT: bne $a2, $a3, .LBB3_1 @@ -216,29 +209,27 @@ define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) { ; LA64-NEXT: andi $a6, $a6, 255 ; LA64-NEXT: sll.w $a6, $a6, $a0 ; LA64-NEXT: and $a7, $a3, $a4 -; LA64-NEXT: or $a7, $a7, $a6 +; LA64-NEXT: or $a6, $a7, $a6 +; LA64-NEXT: addi.w $a7, $a3, 0 ; LA64-NEXT: .LBB4_3: # %atomicrmw.start ; LA64-NEXT: # Parent Loop BB4_1 Depth=1 ; LA64-NEXT: # => This Inner Loop Header: Depth=2 -; LA64-NEXT: ll.w $a6, $a2, 0 -; LA64-NEXT: bne $a6, $a3, .LBB4_5 +; LA64-NEXT: ll.w $a3, $a2, 0 +; LA64-NEXT: bne $a3, $a7, .LBB4_5 ; LA64-NEXT: # %bb.4: # %atomicrmw.start ; LA64-NEXT: # in Loop: Header=BB4_3 Depth=2 -; LA64-NEXT: dbar 0 -; LA64-NEXT: move $t0, $a7 +; LA64-NEXT: move $t0, $a6 ; LA64-NEXT: sc.w $t0, $a2, 0 ; LA64-NEXT: beqz $t0, .LBB4_3 ; LA64-NEXT: b .LBB4_6 ; LA64-NEXT: .LBB4_5: # %atomicrmw.start ; LA64-NEXT: # in Loop: Header=BB4_1 Depth=1 -; LA64-NEXT: dbar 1792 +; LA64-NEXT: dbar 20 ; LA64-NEXT: .LBB4_6: # %atomicrmw.start ; LA64-NEXT: # in Loop: Header=BB4_1 Depth=1 -; LA64-NEXT: addi.w $a7, $a3, 0 -; LA64-NEXT: move $a3, $a6 -; LA64-NEXT: bne $a6, $a7, .LBB4_1 +; LA64-NEXT: bne $a3, $a7, .LBB4_1 ; LA64-NEXT: # %bb.2: # %atomicrmw.end -; LA64-NEXT: srl.w $a0, $a6, $a0 +; LA64-NEXT: srl.w $a0, $a3, $a0 ; LA64-NEXT: ret %result = atomicrmw udec_wrap ptr %ptr, i8 %val seq_cst ret i8 %result @@ -275,29 +266,27 @@ define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) { ; LA64-NEXT: bstrpick.d $a6, $a6, 15, 0 ; LA64-NEXT: sll.w $a6, $a6, $a0 ; LA64-NEXT: and $a7, $a3, $a4 -; LA64-NEXT: or $a7, $a7, $a6 +; LA64-NEXT: or $a6, $a7, $a6 +; LA64-NEXT: addi.w $a7, $a3, 0 ; LA64-NEXT: .LBB5_3: # %atomicrmw.start ; LA64-NEXT: # Parent Loop BB5_1 Depth=1 ; LA64-NEXT: # => This Inner Loop Header: Depth=2 -; LA64-NEXT: ll.w $a6, $a2, 0 -; LA64-NEXT: bne $a6, $a3, .LBB5_5 +; LA64-NEXT: ll.w $a3, $a2, 0 +; LA64-NEXT: bne $a3, $a7, .LBB5_5 ; LA64-NEXT: # %bb.4: # %atomicrmw.start ; LA64-NEXT: # in Loop: Header=BB5_3 Depth=2 -; LA64-NEXT: dbar 0 -; LA64-NEXT: move $t0, $a7 +; LA64-NEXT: move $t0, $a6 ; LA64-NEXT: sc.w $t0, $a2, 0 ; LA64-NEXT: beqz $t0, .LBB5_3 ; LA64-NEXT: b .LBB5_6 ; LA64-NEXT: .LBB5_5: # %atomicrmw.start ; LA64-NEXT: # in Loop: Header=BB5_1 Depth=1 -; LA64-NEXT: dbar 1792 +; LA64-NEXT: dbar 20 ; LA64-NEXT: .LBB5_6: # %atomicrmw.start ; LA64-NEXT: # in Loop: Header=BB5_1 Depth=1 -; LA64-NEXT: addi.w $a7, $a3, 0 -; LA64-NEXT: move $a3, $a6 -; LA64-NEXT: bne $a6, $a7, .LBB5_1 +; LA64-NEXT: bne $a3, $a7, .LBB5_1 ; LA64-NEXT: # %bb.2: # %atomicrmw.end -; LA64-NEXT: srl.w $a0, $a6, $a0 +; LA64-NEXT: srl.w $a0, $a3, $a0 ; LA64-NEXT: ret %result = atomicrmw udec_wrap ptr %ptr, i16 %val seq_cst ret i16 %result @@ -306,22 +295,22 @@ define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) { define i32 @atomicrmw_udec_wrap_i32(ptr %ptr, i32 %val) { ; LA64-LABEL: atomicrmw_udec_wrap_i32: ; LA64: # %bb.0: -; LA64-NEXT: ld.w $a4, $a0, 0 +; LA64-NEXT: ld.w $a2, $a0, 0 ; LA64-NEXT: addi.w $a3, $a1, 0 ; LA64-NEXT: .p2align 4, , 16 ; LA64-NEXT: .LBB6_1: # %atomicrmw.start ; LA64-NEXT: # =>This Loop Header: Depth=1 ; LA64-NEXT: # Child Loop BB6_3 Depth 2 -; LA64-NEXT: addi.w $a5, $a4, 0 -; LA64-NEXT: sltu $a2, $a3, $a5 -; LA64-NEXT: addi.d $a6, $a4, -1 -; LA64-NEXT: masknez $a6, $a6, $a2 -; LA64-NEXT: maskeqz $a2, $a1, $a2 -; LA64-NEXT: or $a2, $a2, $a6 -; LA64-NEXT: sltui $a6, $a5, 1 -; LA64-NEXT: masknez $a2, $a2, $a6 -; LA64-NEXT: maskeqz $a6, $a1, $a6 -; LA64-NEXT: or $a6, $a6, $a2 +; LA64-NEXT: addi.w $a4, $a2, 0 +; LA64-NEXT: sltu $a5, $a3, $a4 +; LA64-NEXT: addi.d $a2, $a2, -1 +; LA64-NEXT: masknez $a2, $a2, $a5 +; LA64-NEXT: maskeqz $a5, $a1, $a5 +; LA64-NEXT: or $a2, $a5, $a2 +; LA64-NEXT: sltui $a5, $a4, 1 +; LA64-NEXT: masknez $a2, $a2, $a5 +; LA64-NEXT: maskeqz $a5, $a1, $a5 +; LA64-NEXT: or $a5, $a5, $a2 ; LA64-NEXT: .LBB6_3: # %atomicrmw.start ; LA64-NEXT: # Parent Loop BB6_1 Depth=1 ; LA64-NEXT: # => This Inner Loop Header: Depth=2 @@ -329,18 +318,16 @@ define i32 @atomicrmw_udec_wrap_i32(ptr %ptr, i32 %val) { ; LA64-NEXT: bne $a2, $a4, .LBB6_5 ; LA64-NEXT: # %bb.4: # %atomicrmw.start ; LA64-NEXT: # in Loop: Header=BB6_3 Depth=2 -; LA64-NEXT: dbar 0 -; LA64-NEXT: move $a7, $a6 -; LA64-NEXT: sc.w $a7, $a0, 0 -; LA64-NEXT: beqz $a7, .LBB6_3 +; LA64-NEXT: move $a6, $a5 +; LA64-NEXT: sc.w $a6, $a0, 0 +; LA64-NEXT: beqz $a6, .LBB6_3 ; LA64-NEXT: b .LBB6_6 ; LA64-NEXT: .LBB6_5: # %atomicrmw.start ; LA64-NEXT: # in Loop: Header=BB6_1 Depth=1 -; LA64-NEXT: dbar 1792 +; LA64-NEXT: dbar 20 ; LA64-NEXT: .LBB6_6: # %atomicrmw.start ; LA64-NEXT: # in Loop: Header=BB6_1 Depth=1 -; LA64-NEXT: move $a4, $a2 -; LA64-NEXT: bne $a2, $a5, .LBB6_1 +; LA64-NEXT: bne $a2, $a4, .LBB6_1 ; LA64-NEXT: # %bb.2: # %atomicrmw.end ; LA64-NEXT: move $a0, $a2 ; LA64-NEXT: ret @@ -373,14 +360,13 @@ define i64 @atomicrmw_udec_wrap_i64(ptr %ptr, i64 %val) { ; LA64-NEXT: bne $a2, $a3, .LBB7_5 ; LA64-NEXT: # %bb.4: # %atomicrmw.start ; LA64-NEXT: # in Loop: Header=BB7_3 Depth=2 -; LA64-NEXT: dbar 0 ; LA64-NEXT: move $a5, $a4 ; LA64-NEXT: sc.d $a5, $a0, 0 ; LA64-NEXT: beqz $a5, .LBB7_3 ; LA64-NEXT: b .LBB7_6 ; LA64-NEXT: .LBB7_5: # %atomicrmw.start ; LA64-NEXT: # in Loop: Header=BB7_1 Depth=1 -; LA64-NEXT: dbar 1792 +; LA64-NEXT: dbar 20 ; LA64-NEXT: .LBB7_6: # %atomicrmw.start ; LA64-NEXT: # in Loop: Header=BB7_1 Depth=1 ; LA64-NEXT: bne $a2, $a3, .LBB7_1 diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll index 76e51fe7d3e850e1b564b220b8ab2490c10e4dd2..ebb09640e6c939a78fec059dd531bee985599f63 100644 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll @@ -21,14 +21,13 @@ define void @cmpxchg_i8_acquire_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; LA64-NEXT: and $a5, $a4, $a0 ; LA64-NEXT: bne $a5, $a1, .LBB0_3 ; LA64-NEXT: # %bb.2: # in Loop: Header=BB0_1 Depth=1 -; LA64-NEXT: dbar 0 ; LA64-NEXT: andn $a5, $a4, $a0 ; LA64-NEXT: or $a5, $a5, $a2 ; LA64-NEXT: sc.w $a5, $a3, 0 ; LA64-NEXT: beqz $a5, .LBB0_1 ; LA64-NEXT: b .LBB0_4 ; LA64-NEXT: .LBB0_3: -; LA64-NEXT: dbar 1792 +; LA64-NEXT: dbar 20 ; LA64-NEXT: .LBB0_4: ; LA64-NEXT: ret %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val acquire acquire @@ -56,14 +55,13 @@ define void @cmpxchg_i16_acquire_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind ; LA64-NEXT: and $a5, $a4, $a0 ; LA64-NEXT: bne $a5, $a1, .LBB1_3 ; LA64-NEXT: # %bb.2: # in Loop: Header=BB1_1 Depth=1 -; LA64-NEXT: dbar 0 ; LA64-NEXT: andn $a5, $a4, $a0 ; LA64-NEXT: or $a5, $a5, $a2 ; LA64-NEXT: sc.w $a5, $a3, 0 ; LA64-NEXT: beqz $a5, .LBB1_1 ; LA64-NEXT: b .LBB1_4 ; LA64-NEXT: .LBB1_3: -; LA64-NEXT: dbar 1792 +; LA64-NEXT: dbar 20 ; LA64-NEXT: .LBB1_4: ; LA64-NEXT: ret %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val acquire acquire @@ -73,17 +71,17 @@ define void @cmpxchg_i16_acquire_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind define void @cmpxchg_i32_acquire_acquire(ptr %ptr, i32 %cmp, i32 %val) nounwind { ; LA64-LABEL: cmpxchg_i32_acquire_acquire: ; LA64: # %bb.0: +; LA64-NEXT: addi.w $a1, $a1, 0 ; LA64-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 ; LA64-NEXT: ll.w $a3, $a0, 0 ; LA64-NEXT: bne $a3, $a1, .LBB2_3 ; LA64-NEXT: # %bb.2: # in Loop: Header=BB2_1 Depth=1 -; LA64-NEXT: dbar 0 ; LA64-NEXT: move $a4, $a2 ; LA64-NEXT: sc.w $a4, $a0, 0 ; LA64-NEXT: beqz $a4, .LBB2_1 ; LA64-NEXT: b .LBB2_4 ; LA64-NEXT: .LBB2_3: -; LA64-NEXT: dbar 1792 +; LA64-NEXT: dbar 20 ; LA64-NEXT: .LBB2_4: ; LA64-NEXT: ret %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val acquire acquire @@ -97,19 +95,124 @@ define void @cmpxchg_i64_acquire_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind ; LA64-NEXT: ll.d $a3, $a0, 0 ; LA64-NEXT: bne $a3, $a1, .LBB3_3 ; LA64-NEXT: # %bb.2: # in Loop: Header=BB3_1 Depth=1 -; LA64-NEXT: dbar 0 ; LA64-NEXT: move $a4, $a2 ; LA64-NEXT: sc.d $a4, $a0, 0 ; LA64-NEXT: beqz $a4, .LBB3_1 ; LA64-NEXT: b .LBB3_4 ; LA64-NEXT: .LBB3_3: -; LA64-NEXT: dbar 1792 +; LA64-NEXT: dbar 20 ; LA64-NEXT: .LBB3_4: ; LA64-NEXT: ret %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val acquire acquire ret void } +define void @cmpxchg_i8_acquire_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind { +; LA64-LABEL: cmpxchg_i8_acquire_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a3, $zero, -4 +; LA64-NEXT: and $a3, $a0, $a3 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: andi $a1, $a1, 255 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: andi $a2, $a2, 255 +; LA64-NEXT: sll.w $a2, $a2, $a0 +; LA64-NEXT: ori $a4, $zero, 255 +; LA64-NEXT: sll.w $a0, $a4, $a0 +; LA64-NEXT: addi.w $a0, $a0, 0 +; LA64-NEXT: addi.w $a2, $a2, 0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a4, $a3, 0 +; LA64-NEXT: and $a5, $a4, $a0 +; LA64-NEXT: bne $a5, $a1, .LBB4_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB4_1 Depth=1 +; LA64-NEXT: andn $a5, $a4, $a0 +; LA64-NEXT: or $a5, $a5, $a2 +; LA64-NEXT: sc.w $a5, $a3, 0 +; LA64-NEXT: beqz $a5, .LBB4_1 +; LA64-NEXT: b .LBB4_4 +; LA64-NEXT: .LBB4_3: +; LA64-NEXT: dbar 1792 +; LA64-NEXT: .LBB4_4: +; LA64-NEXT: ret + %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val acquire monotonic + ret void +} + +define void @cmpxchg_i16_acquire_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwind { +; LA64-LABEL: cmpxchg_i16_acquire_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a3, $zero, -4 +; LA64-NEXT: and $a3, $a0, $a3 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: bstrpick.d $a2, $a2, 15, 0 +; LA64-NEXT: sll.w $a2, $a2, $a0 +; LA64-NEXT: lu12i.w $a4, 15 +; LA64-NEXT: ori $a4, $a4, 4095 +; LA64-NEXT: sll.w $a0, $a4, $a0 +; LA64-NEXT: addi.w $a0, $a0, 0 +; LA64-NEXT: addi.w $a2, $a2, 0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a4, $a3, 0 +; LA64-NEXT: and $a5, $a4, $a0 +; LA64-NEXT: bne $a5, $a1, .LBB5_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB5_1 Depth=1 +; LA64-NEXT: andn $a5, $a4, $a0 +; LA64-NEXT: or $a5, $a5, $a2 +; LA64-NEXT: sc.w $a5, $a3, 0 +; LA64-NEXT: beqz $a5, .LBB5_1 +; LA64-NEXT: b .LBB5_4 +; LA64-NEXT: .LBB5_3: +; LA64-NEXT: dbar 1792 +; LA64-NEXT: .LBB5_4: +; LA64-NEXT: ret + %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val acquire monotonic + ret void +} + +define void @cmpxchg_i32_acquire_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwind { +; LA64-LABEL: cmpxchg_i32_acquire_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a3, $a0, 0 +; LA64-NEXT: bne $a3, $a1, .LBB6_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB6_1 Depth=1 +; LA64-NEXT: move $a4, $a2 +; LA64-NEXT: sc.w $a4, $a0, 0 +; LA64-NEXT: beqz $a4, .LBB6_1 +; LA64-NEXT: b .LBB6_4 +; LA64-NEXT: .LBB6_3: +; LA64-NEXT: dbar 1792 +; LA64-NEXT: .LBB6_4: +; LA64-NEXT: ret + %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val acquire monotonic + ret void +} + +define void @cmpxchg_i64_acquire_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwind { +; LA64-LABEL: cmpxchg_i64_acquire_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: .LBB7_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.d $a3, $a0, 0 +; LA64-NEXT: bne $a3, $a1, .LBB7_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB7_1 Depth=1 +; LA64-NEXT: move $a4, $a2 +; LA64-NEXT: sc.d $a4, $a0, 0 +; LA64-NEXT: beqz $a4, .LBB7_1 +; LA64-NEXT: b .LBB7_4 +; LA64-NEXT: .LBB7_3: +; LA64-NEXT: dbar 1792 +; LA64-NEXT: .LBB7_4: +; LA64-NEXT: ret + %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val acquire monotonic + ret void +} + define i8 @cmpxchg_i8_acquire_acquire_reti8(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; LA64-LABEL: cmpxchg_i8_acquire_acquire_reti8: ; LA64: # %bb.0: @@ -125,20 +228,19 @@ define i8 @cmpxchg_i8_acquire_acquire_reti8(ptr %ptr, i8 %cmp, i8 %val) nounwind ; LA64-NEXT: andi $a1, $a1, 255 ; LA64-NEXT: sll.w $a1, $a1, $a0 ; LA64-NEXT: addi.w $a1, $a1, 0 -; LA64-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 ; LA64-NEXT: ll.w $a5, $a3, 0 ; LA64-NEXT: and $a6, $a5, $a4 -; LA64-NEXT: bne $a6, $a1, .LBB4_3 -; LA64-NEXT: # %bb.2: # in Loop: Header=BB4_1 Depth=1 -; LA64-NEXT: dbar 0 +; LA64-NEXT: bne $a6, $a1, .LBB8_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB8_1 Depth=1 ; LA64-NEXT: andn $a6, $a5, $a4 ; LA64-NEXT: or $a6, $a6, $a2 ; LA64-NEXT: sc.w $a6, $a3, 0 -; LA64-NEXT: beqz $a6, .LBB4_1 -; LA64-NEXT: b .LBB4_4 -; LA64-NEXT: .LBB4_3: -; LA64-NEXT: dbar 1792 -; LA64-NEXT: .LBB4_4: +; LA64-NEXT: beqz $a6, .LBB8_1 +; LA64-NEXT: b .LBB8_4 +; LA64-NEXT: .LBB8_3: +; LA64-NEXT: dbar 20 +; LA64-NEXT: .LBB8_4: ; LA64-NEXT: srl.w $a0, $a5, $a0 ; LA64-NEXT: ret %tmp = cmpxchg ptr %ptr, i8 %cmp, i8 %val acquire acquire @@ -162,20 +264,19 @@ define i16 @cmpxchg_i16_acquire_acquire_reti16(ptr %ptr, i16 %cmp, i16 %val) nou ; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 ; LA64-NEXT: sll.w $a1, $a1, $a0 ; LA64-NEXT: addi.w $a1, $a1, 0 -; LA64-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 ; LA64-NEXT: ll.w $a5, $a3, 0 ; LA64-NEXT: and $a6, $a5, $a4 -; LA64-NEXT: bne $a6, $a1, .LBB5_3 -; LA64-NEXT: # %bb.2: # in Loop: Header=BB5_1 Depth=1 -; LA64-NEXT: dbar 0 +; LA64-NEXT: bne $a6, $a1, .LBB9_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB9_1 Depth=1 ; LA64-NEXT: andn $a6, $a5, $a4 ; LA64-NEXT: or $a6, $a6, $a2 ; LA64-NEXT: sc.w $a6, $a3, 0 -; LA64-NEXT: beqz $a6, .LBB5_1 -; LA64-NEXT: b .LBB5_4 -; LA64-NEXT: .LBB5_3: -; LA64-NEXT: dbar 1792 -; LA64-NEXT: .LBB5_4: +; LA64-NEXT: beqz $a6, .LBB9_1 +; LA64-NEXT: b .LBB9_4 +; LA64-NEXT: .LBB9_3: +; LA64-NEXT: dbar 20 +; LA64-NEXT: .LBB9_4: ; LA64-NEXT: srl.w $a0, $a5, $a0 ; LA64-NEXT: ret %tmp = cmpxchg ptr %ptr, i16 %cmp, i16 %val acquire acquire @@ -186,19 +287,19 @@ define i16 @cmpxchg_i16_acquire_acquire_reti16(ptr %ptr, i16 %cmp, i16 %val) nou define i32 @cmpxchg_i32_acquire_acquire_reti32(ptr %ptr, i32 %cmp, i32 %val) nounwind { ; LA64-LABEL: cmpxchg_i32_acquire_acquire_reti32: ; LA64: # %bb.0: -; LA64-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 -; LA64-NEXT: ll.w $a3, $a0, 0 -; LA64-NEXT: bne $a3, $a1, .LBB6_3 -; LA64-NEXT: # %bb.2: # in Loop: Header=BB6_1 Depth=1 -; LA64-NEXT: dbar 0 +; LA64-NEXT: addi.w $a3, $a1, 0 +; LA64-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a1, $a0, 0 +; LA64-NEXT: bne $a1, $a3, .LBB10_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB10_1 Depth=1 ; LA64-NEXT: move $a4, $a2 ; LA64-NEXT: sc.w $a4, $a0, 0 -; LA64-NEXT: beqz $a4, .LBB6_1 -; LA64-NEXT: b .LBB6_4 -; LA64-NEXT: .LBB6_3: -; LA64-NEXT: dbar 1792 -; LA64-NEXT: .LBB6_4: -; LA64-NEXT: move $a0, $a3 +; LA64-NEXT: beqz $a4, .LBB10_1 +; LA64-NEXT: b .LBB10_4 +; LA64-NEXT: .LBB10_3: +; LA64-NEXT: dbar 20 +; LA64-NEXT: .LBB10_4: +; LA64-NEXT: move $a0, $a1 ; LA64-NEXT: ret %tmp = cmpxchg ptr %ptr, i32 %cmp, i32 %val acquire acquire %res = extractvalue { i32, i1 } %tmp, 0 @@ -208,18 +309,17 @@ define i32 @cmpxchg_i32_acquire_acquire_reti32(ptr %ptr, i32 %cmp, i32 %val) nou define i64 @cmpxchg_i64_acquire_acquire_reti64(ptr %ptr, i64 %cmp, i64 %val) nounwind { ; LA64-LABEL: cmpxchg_i64_acquire_acquire_reti64: ; LA64: # %bb.0: -; LA64-NEXT: .LBB7_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 ; LA64-NEXT: ll.d $a3, $a0, 0 -; LA64-NEXT: bne $a3, $a1, .LBB7_3 -; LA64-NEXT: # %bb.2: # in Loop: Header=BB7_1 Depth=1 -; LA64-NEXT: dbar 0 +; LA64-NEXT: bne $a3, $a1, .LBB11_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB11_1 Depth=1 ; LA64-NEXT: move $a4, $a2 ; LA64-NEXT: sc.d $a4, $a0, 0 -; LA64-NEXT: beqz $a4, .LBB7_1 -; LA64-NEXT: b .LBB7_4 -; LA64-NEXT: .LBB7_3: -; LA64-NEXT: dbar 1792 -; LA64-NEXT: .LBB7_4: +; LA64-NEXT: beqz $a4, .LBB11_1 +; LA64-NEXT: b .LBB11_4 +; LA64-NEXT: .LBB11_3: +; LA64-NEXT: dbar 20 +; LA64-NEXT: .LBB11_4: ; LA64-NEXT: move $a0, $a3 ; LA64-NEXT: ret %tmp = cmpxchg ptr %ptr, i64 %cmp, i64 %val acquire acquire @@ -242,20 +342,19 @@ define i1 @cmpxchg_i8_acquire_acquire_reti1(ptr %ptr, i8 %cmp, i8 %val) nounwind ; LA64-NEXT: addi.w $a0, $a0, 0 ; LA64-NEXT: addi.w $a1, $a1, 0 ; LA64-NEXT: addi.w $a2, $a4, 0 -; LA64-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 ; LA64-NEXT: ll.w $a5, $a3, 0 ; LA64-NEXT: and $a6, $a5, $a2 -; LA64-NEXT: bne $a6, $a1, .LBB8_3 -; LA64-NEXT: # %bb.2: # in Loop: Header=BB8_1 Depth=1 -; LA64-NEXT: dbar 0 +; LA64-NEXT: bne $a6, $a1, .LBB12_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB12_1 Depth=1 ; LA64-NEXT: andn $a6, $a5, $a2 ; LA64-NEXT: or $a6, $a6, $a0 ; LA64-NEXT: sc.w $a6, $a3, 0 -; LA64-NEXT: beqz $a6, .LBB8_1 -; LA64-NEXT: b .LBB8_4 -; LA64-NEXT: .LBB8_3: -; LA64-NEXT: dbar 1792 -; LA64-NEXT: .LBB8_4: +; LA64-NEXT: beqz $a6, .LBB12_1 +; LA64-NEXT: b .LBB12_4 +; LA64-NEXT: .LBB12_3: +; LA64-NEXT: dbar 20 +; LA64-NEXT: .LBB12_4: ; LA64-NEXT: and $a0, $a5, $a4 ; LA64-NEXT: addi.w $a0, $a0, 0 ; LA64-NEXT: xor $a0, $a1, $a0 @@ -282,20 +381,19 @@ define i1 @cmpxchg_i16_acquire_acquire_reti1(ptr %ptr, i16 %cmp, i16 %val) nounw ; LA64-NEXT: addi.w $a0, $a0, 0 ; LA64-NEXT: addi.w $a1, $a1, 0 ; LA64-NEXT: addi.w $a2, $a4, 0 -; LA64-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 ; LA64-NEXT: ll.w $a5, $a3, 0 ; LA64-NEXT: and $a6, $a5, $a2 -; LA64-NEXT: bne $a6, $a1, .LBB9_3 -; LA64-NEXT: # %bb.2: # in Loop: Header=BB9_1 Depth=1 -; LA64-NEXT: dbar 0 +; LA64-NEXT: bne $a6, $a1, .LBB13_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB13_1 Depth=1 ; LA64-NEXT: andn $a6, $a5, $a2 ; LA64-NEXT: or $a6, $a6, $a0 ; LA64-NEXT: sc.w $a6, $a3, 0 -; LA64-NEXT: beqz $a6, .LBB9_1 -; LA64-NEXT: b .LBB9_4 -; LA64-NEXT: .LBB9_3: -; LA64-NEXT: dbar 1792 -; LA64-NEXT: .LBB9_4: +; LA64-NEXT: beqz $a6, .LBB13_1 +; LA64-NEXT: b .LBB13_4 +; LA64-NEXT: .LBB13_3: +; LA64-NEXT: dbar 20 +; LA64-NEXT: .LBB13_4: ; LA64-NEXT: and $a0, $a5, $a4 ; LA64-NEXT: addi.w $a0, $a0, 0 ; LA64-NEXT: xor $a0, $a1, $a0 @@ -309,20 +407,19 @@ define i1 @cmpxchg_i16_acquire_acquire_reti1(ptr %ptr, i16 %cmp, i16 %val) nounw define i1 @cmpxchg_i32_acquire_acquire_reti1(ptr %ptr, i32 %cmp, i32 %val) nounwind { ; LA64-LABEL: cmpxchg_i32_acquire_acquire_reti1: ; LA64: # %bb.0: -; LA64-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 ; LA64-NEXT: ll.w $a3, $a0, 0 -; LA64-NEXT: bne $a3, $a1, .LBB10_3 -; LA64-NEXT: # %bb.2: # in Loop: Header=BB10_1 Depth=1 -; LA64-NEXT: dbar 0 +; LA64-NEXT: bne $a3, $a1, .LBB14_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB14_1 Depth=1 ; LA64-NEXT: move $a4, $a2 ; LA64-NEXT: sc.w $a4, $a0, 0 -; LA64-NEXT: beqz $a4, .LBB10_1 -; LA64-NEXT: b .LBB10_4 -; LA64-NEXT: .LBB10_3: -; LA64-NEXT: dbar 1792 -; LA64-NEXT: .LBB10_4: -; LA64-NEXT: addi.w $a0, $a1, 0 -; LA64-NEXT: xor $a0, $a3, $a0 +; LA64-NEXT: beqz $a4, .LBB14_1 +; LA64-NEXT: b .LBB14_4 +; LA64-NEXT: .LBB14_3: +; LA64-NEXT: dbar 20 +; LA64-NEXT: .LBB14_4: +; LA64-NEXT: xor $a0, $a3, $a1 ; LA64-NEXT: sltui $a0, $a0, 1 ; LA64-NEXT: ret %tmp = cmpxchg ptr %ptr, i32 %cmp, i32 %val acquire acquire @@ -333,18 +430,17 @@ define i1 @cmpxchg_i32_acquire_acquire_reti1(ptr %ptr, i32 %cmp, i32 %val) nounw define i1 @cmpxchg_i64_acquire_acquire_reti1(ptr %ptr, i64 %cmp, i64 %val) nounwind { ; LA64-LABEL: cmpxchg_i64_acquire_acquire_reti1: ; LA64: # %bb.0: -; LA64-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 ; LA64-NEXT: ll.d $a3, $a0, 0 -; LA64-NEXT: bne $a3, $a1, .LBB11_3 -; LA64-NEXT: # %bb.2: # in Loop: Header=BB11_1 Depth=1 -; LA64-NEXT: dbar 0 +; LA64-NEXT: bne $a3, $a1, .LBB15_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 ; LA64-NEXT: move $a4, $a2 ; LA64-NEXT: sc.d $a4, $a0, 0 -; LA64-NEXT: beqz $a4, .LBB11_1 -; LA64-NEXT: b .LBB11_4 -; LA64-NEXT: .LBB11_3: -; LA64-NEXT: dbar 1792 -; LA64-NEXT: .LBB11_4: +; LA64-NEXT: beqz $a4, .LBB15_1 +; LA64-NEXT: b .LBB15_4 +; LA64-NEXT: .LBB15_3: +; LA64-NEXT: dbar 20 +; LA64-NEXT: .LBB15_4: ; LA64-NEXT: xor $a0, $a3, $a1 ; LA64-NEXT: sltui $a0, $a0, 1 ; LA64-NEXT: ret @@ -352,3 +448,345 @@ define i1 @cmpxchg_i64_acquire_acquire_reti1(ptr %ptr, i64 %cmp, i64 %val) nounw %res = extractvalue { i64, i1 } %tmp, 1 ret i1 %res } + +define void @cmpxchg_i8_monotonic_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind { +; LA64-LABEL: cmpxchg_i8_monotonic_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a3, $zero, -4 +; LA64-NEXT: and $a3, $a0, $a3 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: andi $a1, $a1, 255 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: andi $a2, $a2, 255 +; LA64-NEXT: sll.w $a2, $a2, $a0 +; LA64-NEXT: ori $a4, $zero, 255 +; LA64-NEXT: sll.w $a0, $a4, $a0 +; LA64-NEXT: addi.w $a0, $a0, 0 +; LA64-NEXT: addi.w $a2, $a2, 0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a4, $a3, 0 +; LA64-NEXT: and $a5, $a4, $a0 +; LA64-NEXT: bne $a5, $a1, .LBB16_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB16_1 Depth=1 +; LA64-NEXT: andn $a5, $a4, $a0 +; LA64-NEXT: or $a5, $a5, $a2 +; LA64-NEXT: sc.w $a5, $a3, 0 +; LA64-NEXT: beqz $a5, .LBB16_1 +; LA64-NEXT: b .LBB16_4 +; LA64-NEXT: .LBB16_3: +; LA64-NEXT: dbar 1792 +; LA64-NEXT: .LBB16_4: +; LA64-NEXT: ret + %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val monotonic monotonic + ret void +} + +define void @cmpxchg_i16_monotonic_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwind { +; LA64-LABEL: cmpxchg_i16_monotonic_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a3, $zero, -4 +; LA64-NEXT: and $a3, $a0, $a3 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: bstrpick.d $a2, $a2, 15, 0 +; LA64-NEXT: sll.w $a2, $a2, $a0 +; LA64-NEXT: lu12i.w $a4, 15 +; LA64-NEXT: ori $a4, $a4, 4095 +; LA64-NEXT: sll.w $a0, $a4, $a0 +; LA64-NEXT: addi.w $a0, $a0, 0 +; LA64-NEXT: addi.w $a2, $a2, 0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a4, $a3, 0 +; LA64-NEXT: and $a5, $a4, $a0 +; LA64-NEXT: bne $a5, $a1, .LBB17_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB17_1 Depth=1 +; LA64-NEXT: andn $a5, $a4, $a0 +; LA64-NEXT: or $a5, $a5, $a2 +; LA64-NEXT: sc.w $a5, $a3, 0 +; LA64-NEXT: beqz $a5, .LBB17_1 +; LA64-NEXT: b .LBB17_4 +; LA64-NEXT: .LBB17_3: +; LA64-NEXT: dbar 1792 +; LA64-NEXT: .LBB17_4: +; LA64-NEXT: ret + %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val monotonic monotonic + ret void +} + +define void @cmpxchg_i32_monotonic_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwind { +; LA64-LABEL: cmpxchg_i32_monotonic_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a3, $a0, 0 +; LA64-NEXT: bne $a3, $a1, .LBB18_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB18_1 Depth=1 +; LA64-NEXT: move $a4, $a2 +; LA64-NEXT: sc.w $a4, $a0, 0 +; LA64-NEXT: beqz $a4, .LBB18_1 +; LA64-NEXT: b .LBB18_4 +; LA64-NEXT: .LBB18_3: +; LA64-NEXT: dbar 1792 +; LA64-NEXT: .LBB18_4: +; LA64-NEXT: ret + %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val monotonic monotonic + ret void +} + +define void @cmpxchg_i64_monotonic_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwind { +; LA64-LABEL: cmpxchg_i64_monotonic_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: .LBB19_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.d $a3, $a0, 0 +; LA64-NEXT: bne $a3, $a1, .LBB19_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB19_1 Depth=1 +; LA64-NEXT: move $a4, $a2 +; LA64-NEXT: sc.d $a4, $a0, 0 +; LA64-NEXT: beqz $a4, .LBB19_1 +; LA64-NEXT: b .LBB19_4 +; LA64-NEXT: .LBB19_3: +; LA64-NEXT: dbar 1792 +; LA64-NEXT: .LBB19_4: +; LA64-NEXT: ret + %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val monotonic monotonic + ret void +} + +define i8 @cmpxchg_i8_monotonic_monotonic_reti8(ptr %ptr, i8 %cmp, i8 %val) nounwind { +; LA64-LABEL: cmpxchg_i8_monotonic_monotonic_reti8: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a3, $zero, -4 +; LA64-NEXT: and $a3, $a0, $a3 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: ori $a4, $zero, 255 +; LA64-NEXT: sll.w $a4, $a4, $a0 +; LA64-NEXT: addi.w $a4, $a4, 0 +; LA64-NEXT: andi $a2, $a2, 255 +; LA64-NEXT: sll.w $a2, $a2, $a0 +; LA64-NEXT: addi.w $a2, $a2, 0 +; LA64-NEXT: andi $a1, $a1, 255 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a5, $a3, 0 +; LA64-NEXT: and $a6, $a5, $a4 +; LA64-NEXT: bne $a6, $a1, .LBB20_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB20_1 Depth=1 +; LA64-NEXT: andn $a6, $a5, $a4 +; LA64-NEXT: or $a6, $a6, $a2 +; LA64-NEXT: sc.w $a6, $a3, 0 +; LA64-NEXT: beqz $a6, .LBB20_1 +; LA64-NEXT: b .LBB20_4 +; LA64-NEXT: .LBB20_3: +; LA64-NEXT: dbar 1792 +; LA64-NEXT: .LBB20_4: +; LA64-NEXT: srl.w $a0, $a5, $a0 +; LA64-NEXT: ret + %tmp = cmpxchg ptr %ptr, i8 %cmp, i8 %val monotonic monotonic + %res = extractvalue { i8, i1 } %tmp, 0 + ret i8 %res +} + +define i16 @cmpxchg_i16_monotonic_monotonic_reti16(ptr %ptr, i16 %cmp, i16 %val) nounwind { +; LA64-LABEL: cmpxchg_i16_monotonic_monotonic_reti16: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a3, $zero, -4 +; LA64-NEXT: and $a3, $a0, $a3 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: lu12i.w $a4, 15 +; LA64-NEXT: ori $a4, $a4, 4095 +; LA64-NEXT: sll.w $a4, $a4, $a0 +; LA64-NEXT: addi.w $a4, $a4, 0 +; LA64-NEXT: bstrpick.d $a2, $a2, 15, 0 +; LA64-NEXT: sll.w $a2, $a2, $a0 +; LA64-NEXT: addi.w $a2, $a2, 0 +; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a5, $a3, 0 +; LA64-NEXT: and $a6, $a5, $a4 +; LA64-NEXT: bne $a6, $a1, .LBB21_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB21_1 Depth=1 +; LA64-NEXT: andn $a6, $a5, $a4 +; LA64-NEXT: or $a6, $a6, $a2 +; LA64-NEXT: sc.w $a6, $a3, 0 +; LA64-NEXT: beqz $a6, .LBB21_1 +; LA64-NEXT: b .LBB21_4 +; LA64-NEXT: .LBB21_3: +; LA64-NEXT: dbar 1792 +; LA64-NEXT: .LBB21_4: +; LA64-NEXT: srl.w $a0, $a5, $a0 +; LA64-NEXT: ret + %tmp = cmpxchg ptr %ptr, i16 %cmp, i16 %val monotonic monotonic + %res = extractvalue { i16, i1 } %tmp, 0 + ret i16 %res +} + +define i32 @cmpxchg_i32_monotonic_monotonic_reti32(ptr %ptr, i32 %cmp, i32 %val) nounwind { +; LA64-LABEL: cmpxchg_i32_monotonic_monotonic_reti32: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a3, $a1, 0 +; LA64-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a1, $a0, 0 +; LA64-NEXT: bne $a1, $a3, .LBB22_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB22_1 Depth=1 +; LA64-NEXT: move $a4, $a2 +; LA64-NEXT: sc.w $a4, $a0, 0 +; LA64-NEXT: beqz $a4, .LBB22_1 +; LA64-NEXT: b .LBB22_4 +; LA64-NEXT: .LBB22_3: +; LA64-NEXT: dbar 1792 +; LA64-NEXT: .LBB22_4: +; LA64-NEXT: move $a0, $a1 +; LA64-NEXT: ret + %tmp = cmpxchg ptr %ptr, i32 %cmp, i32 %val monotonic monotonic + %res = extractvalue { i32, i1 } %tmp, 0 + ret i32 %res +} + +define i64 @cmpxchg_i64_monotonic_monotonic_reti64(ptr %ptr, i64 %cmp, i64 %val) nounwind { +; LA64-LABEL: cmpxchg_i64_monotonic_monotonic_reti64: +; LA64: # %bb.0: +; LA64-NEXT: .LBB23_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.d $a3, $a0, 0 +; LA64-NEXT: bne $a3, $a1, .LBB23_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB23_1 Depth=1 +; LA64-NEXT: move $a4, $a2 +; LA64-NEXT: sc.d $a4, $a0, 0 +; LA64-NEXT: beqz $a4, .LBB23_1 +; LA64-NEXT: b .LBB23_4 +; LA64-NEXT: .LBB23_3: +; LA64-NEXT: dbar 1792 +; LA64-NEXT: .LBB23_4: +; LA64-NEXT: move $a0, $a3 +; LA64-NEXT: ret + %tmp = cmpxchg ptr %ptr, i64 %cmp, i64 %val monotonic monotonic + %res = extractvalue { i64, i1 } %tmp, 0 + ret i64 %res +} + +define i1 @cmpxchg_i8_monotonic_monotonic_reti1(ptr %ptr, i8 %cmp, i8 %val) nounwind { +; LA64-LABEL: cmpxchg_i8_monotonic_monotonic_reti1: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a3, $zero, -4 +; LA64-NEXT: and $a3, $a0, $a3 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: ori $a4, $zero, 255 +; LA64-NEXT: sll.w $a4, $a4, $a0 +; LA64-NEXT: andi $a1, $a1, 255 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: andi $a2, $a2, 255 +; LA64-NEXT: sll.w $a0, $a2, $a0 +; LA64-NEXT: addi.w $a0, $a0, 0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: addi.w $a2, $a4, 0 +; LA64-NEXT: .LBB24_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a5, $a3, 0 +; LA64-NEXT: and $a6, $a5, $a2 +; LA64-NEXT: bne $a6, $a1, .LBB24_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB24_1 Depth=1 +; LA64-NEXT: andn $a6, $a5, $a2 +; LA64-NEXT: or $a6, $a6, $a0 +; LA64-NEXT: sc.w $a6, $a3, 0 +; LA64-NEXT: beqz $a6, .LBB24_1 +; LA64-NEXT: b .LBB24_4 +; LA64-NEXT: .LBB24_3: +; LA64-NEXT: dbar 1792 +; LA64-NEXT: .LBB24_4: +; LA64-NEXT: and $a0, $a5, $a4 +; LA64-NEXT: addi.w $a0, $a0, 0 +; LA64-NEXT: xor $a0, $a1, $a0 +; LA64-NEXT: sltui $a0, $a0, 1 +; LA64-NEXT: ret + %tmp = cmpxchg ptr %ptr, i8 %cmp, i8 %val monotonic monotonic + %res = extractvalue { i8, i1 } %tmp, 1 + ret i1 %res +} + +define i1 @cmpxchg_i16_monotonic_monotonic_reti1(ptr %ptr, i16 %cmp, i16 %val) nounwind { +; LA64-LABEL: cmpxchg_i16_monotonic_monotonic_reti1: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a3, $zero, -4 +; LA64-NEXT: and $a3, $a0, $a3 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: lu12i.w $a4, 15 +; LA64-NEXT: ori $a4, $a4, 4095 +; LA64-NEXT: sll.w $a4, $a4, $a0 +; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: bstrpick.d $a2, $a2, 15, 0 +; LA64-NEXT: sll.w $a0, $a2, $a0 +; LA64-NEXT: addi.w $a0, $a0, 0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: addi.w $a2, $a4, 0 +; LA64-NEXT: .LBB25_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a5, $a3, 0 +; LA64-NEXT: and $a6, $a5, $a2 +; LA64-NEXT: bne $a6, $a1, .LBB25_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB25_1 Depth=1 +; LA64-NEXT: andn $a6, $a5, $a2 +; LA64-NEXT: or $a6, $a6, $a0 +; LA64-NEXT: sc.w $a6, $a3, 0 +; LA64-NEXT: beqz $a6, .LBB25_1 +; LA64-NEXT: b .LBB25_4 +; LA64-NEXT: .LBB25_3: +; LA64-NEXT: dbar 1792 +; LA64-NEXT: .LBB25_4: +; LA64-NEXT: and $a0, $a5, $a4 +; LA64-NEXT: addi.w $a0, $a0, 0 +; LA64-NEXT: xor $a0, $a1, $a0 +; LA64-NEXT: sltui $a0, $a0, 1 +; LA64-NEXT: ret + %tmp = cmpxchg ptr %ptr, i16 %cmp, i16 %val monotonic monotonic + %res = extractvalue { i16, i1 } %tmp, 1 + ret i1 %res +} + +define i1 @cmpxchg_i32_monotonic_monotonic_reti1(ptr %ptr, i32 %cmp, i32 %val) nounwind { +; LA64-LABEL: cmpxchg_i32_monotonic_monotonic_reti1: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB26_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a3, $a0, 0 +; LA64-NEXT: bne $a3, $a1, .LBB26_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB26_1 Depth=1 +; LA64-NEXT: move $a4, $a2 +; LA64-NEXT: sc.w $a4, $a0, 0 +; LA64-NEXT: beqz $a4, .LBB26_1 +; LA64-NEXT: b .LBB26_4 +; LA64-NEXT: .LBB26_3: +; LA64-NEXT: dbar 1792 +; LA64-NEXT: .LBB26_4: +; LA64-NEXT: xor $a0, $a3, $a1 +; LA64-NEXT: sltui $a0, $a0, 1 +; LA64-NEXT: ret + %tmp = cmpxchg ptr %ptr, i32 %cmp, i32 %val monotonic monotonic + %res = extractvalue { i32, i1 } %tmp, 1 + ret i1 %res +} + +define i1 @cmpxchg_i64_monotonic_monotonic_reti1(ptr %ptr, i64 %cmp, i64 %val) nounwind { +; LA64-LABEL: cmpxchg_i64_monotonic_monotonic_reti1: +; LA64: # %bb.0: +; LA64-NEXT: .LBB27_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.d $a3, $a0, 0 +; LA64-NEXT: bne $a3, $a1, .LBB27_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB27_1 Depth=1 +; LA64-NEXT: move $a4, $a2 +; LA64-NEXT: sc.d $a4, $a0, 0 +; LA64-NEXT: beqz $a4, .LBB27_1 +; LA64-NEXT: b .LBB27_4 +; LA64-NEXT: .LBB27_3: +; LA64-NEXT: dbar 1792 +; LA64-NEXT: .LBB27_4: +; LA64-NEXT: xor $a0, $a3, $a1 +; LA64-NEXT: sltui $a0, $a0, 1 +; LA64-NEXT: ret + %tmp = cmpxchg ptr %ptr, i64 %cmp, i64 %val monotonic monotonic + %res = extractvalue { i64, i1 } %tmp, 1 + ret i1 %res +} diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll index 9767717395b67e5ad93cf6f4cf07da914c03c51e..4d8160d70803407fbce43a727f4adf447a4ce559 100644 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll @@ -2,8 +2,6 @@ ; RUN: llc --mtriple=loongarch64 --mattr=+f,-d < %s | FileCheck %s --check-prefix=LA64F ; RUN: llc --mtriple=loongarch64 --mattr=+d < %s | FileCheck %s --check-prefix=LA64D -;; Add more test cases after supporting different AtomicOrdering. - define float @float_fadd_acquire(ptr %p) nounwind { ; LA64F-LABEL: float_fadd_acquire: ; LA64F: # %bb.0: @@ -18,6 +16,7 @@ define float @float_fadd_acquire(ptr %p) nounwind { ; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 ; LA64F-NEXT: movfr2gr.s $a1, $fa2 ; LA64F-NEXT: movfr2gr.s $a2, $fa0 +; LA64F-NEXT: addi.w $a2, $a2, 0 ; LA64F-NEXT: .LBB0_3: # %atomicrmw.start ; LA64F-NEXT: # Parent Loop BB0_1 Depth=1 ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 @@ -25,19 +24,17 @@ define float @float_fadd_acquire(ptr %p) nounwind { ; LA64F-NEXT: bne $a3, $a2, .LBB0_5 ; LA64F-NEXT: # %bb.4: # %atomicrmw.start ; LA64F-NEXT: # in Loop: Header=BB0_3 Depth=2 -; LA64F-NEXT: dbar 0 ; LA64F-NEXT: move $a4, $a1 ; LA64F-NEXT: sc.w $a4, $a0, 0 ; LA64F-NEXT: beqz $a4, .LBB0_3 ; LA64F-NEXT: b .LBB0_6 ; LA64F-NEXT: .LBB0_5: # %atomicrmw.start ; LA64F-NEXT: # in Loop: Header=BB0_1 Depth=1 -; LA64F-NEXT: dbar 1792 +; LA64F-NEXT: dbar 20 ; LA64F-NEXT: .LBB0_6: # %atomicrmw.start ; LA64F-NEXT: # in Loop: Header=BB0_1 Depth=1 ; LA64F-NEXT: movgr2fr.w $fa0, $a3 -; LA64F-NEXT: addi.w $a1, $a2, 0 -; LA64F-NEXT: bne $a3, $a1, .LBB0_1 +; LA64F-NEXT: bne $a3, $a2, .LBB0_1 ; LA64F-NEXT: # %bb.2: # %atomicrmw.end ; LA64F-NEXT: ret ; @@ -54,6 +51,7 @@ define float @float_fadd_acquire(ptr %p) nounwind { ; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 ; LA64D-NEXT: movfr2gr.s $a1, $fa2 ; LA64D-NEXT: movfr2gr.s $a2, $fa0 +; LA64D-NEXT: addi.w $a2, $a2, 0 ; LA64D-NEXT: .LBB0_3: # %atomicrmw.start ; LA64D-NEXT: # Parent Loop BB0_1 Depth=1 ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 @@ -61,19 +59,17 @@ define float @float_fadd_acquire(ptr %p) nounwind { ; LA64D-NEXT: bne $a3, $a2, .LBB0_5 ; LA64D-NEXT: # %bb.4: # %atomicrmw.start ; LA64D-NEXT: # in Loop: Header=BB0_3 Depth=2 -; LA64D-NEXT: dbar 0 ; LA64D-NEXT: move $a4, $a1 ; LA64D-NEXT: sc.w $a4, $a0, 0 ; LA64D-NEXT: beqz $a4, .LBB0_3 ; LA64D-NEXT: b .LBB0_6 ; LA64D-NEXT: .LBB0_5: # %atomicrmw.start ; LA64D-NEXT: # in Loop: Header=BB0_1 Depth=1 -; LA64D-NEXT: dbar 1792 +; LA64D-NEXT: dbar 20 ; LA64D-NEXT: .LBB0_6: # %atomicrmw.start ; LA64D-NEXT: # in Loop: Header=BB0_1 Depth=1 ; LA64D-NEXT: movgr2fr.w $fa0, $a3 -; LA64D-NEXT: addi.w $a1, $a2, 0 -; LA64D-NEXT: bne $a3, $a1, .LBB0_1 +; LA64D-NEXT: bne $a3, $a2, .LBB0_1 ; LA64D-NEXT: # %bb.2: # %atomicrmw.end ; LA64D-NEXT: ret %v = atomicrmw fadd ptr %p, float 1.0 acquire, align 4 @@ -94,6 +90,7 @@ define float @float_fsub_acquire(ptr %p) nounwind { ; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 ; LA64F-NEXT: movfr2gr.s $a1, $fa2 ; LA64F-NEXT: movfr2gr.s $a2, $fa0 +; LA64F-NEXT: addi.w $a2, $a2, 0 ; LA64F-NEXT: .LBB1_3: # %atomicrmw.start ; LA64F-NEXT: # Parent Loop BB1_1 Depth=1 ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 @@ -101,19 +98,17 @@ define float @float_fsub_acquire(ptr %p) nounwind { ; LA64F-NEXT: bne $a3, $a2, .LBB1_5 ; LA64F-NEXT: # %bb.4: # %atomicrmw.start ; LA64F-NEXT: # in Loop: Header=BB1_3 Depth=2 -; LA64F-NEXT: dbar 0 ; LA64F-NEXT: move $a4, $a1 ; LA64F-NEXT: sc.w $a4, $a0, 0 ; LA64F-NEXT: beqz $a4, .LBB1_3 ; LA64F-NEXT: b .LBB1_6 ; LA64F-NEXT: .LBB1_5: # %atomicrmw.start ; LA64F-NEXT: # in Loop: Header=BB1_1 Depth=1 -; LA64F-NEXT: dbar 1792 +; LA64F-NEXT: dbar 20 ; LA64F-NEXT: .LBB1_6: # %atomicrmw.start ; LA64F-NEXT: # in Loop: Header=BB1_1 Depth=1 ; LA64F-NEXT: movgr2fr.w $fa0, $a3 -; LA64F-NEXT: addi.w $a1, $a2, 0 -; LA64F-NEXT: bne $a3, $a1, .LBB1_1 +; LA64F-NEXT: bne $a3, $a2, .LBB1_1 ; LA64F-NEXT: # %bb.2: # %atomicrmw.end ; LA64F-NEXT: ret ; @@ -130,6 +125,7 @@ define float @float_fsub_acquire(ptr %p) nounwind { ; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 ; LA64D-NEXT: movfr2gr.s $a1, $fa2 ; LA64D-NEXT: movfr2gr.s $a2, $fa0 +; LA64D-NEXT: addi.w $a2, $a2, 0 ; LA64D-NEXT: .LBB1_3: # %atomicrmw.start ; LA64D-NEXT: # Parent Loop BB1_1 Depth=1 ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 @@ -137,19 +133,17 @@ define float @float_fsub_acquire(ptr %p) nounwind { ; LA64D-NEXT: bne $a3, $a2, .LBB1_5 ; LA64D-NEXT: # %bb.4: # %atomicrmw.start ; LA64D-NEXT: # in Loop: Header=BB1_3 Depth=2 -; LA64D-NEXT: dbar 0 ; LA64D-NEXT: move $a4, $a1 ; LA64D-NEXT: sc.w $a4, $a0, 0 ; LA64D-NEXT: beqz $a4, .LBB1_3 ; LA64D-NEXT: b .LBB1_6 ; LA64D-NEXT: .LBB1_5: # %atomicrmw.start ; LA64D-NEXT: # in Loop: Header=BB1_1 Depth=1 -; LA64D-NEXT: dbar 1792 +; LA64D-NEXT: dbar 20 ; LA64D-NEXT: .LBB1_6: # %atomicrmw.start ; LA64D-NEXT: # in Loop: Header=BB1_1 Depth=1 ; LA64D-NEXT: movgr2fr.w $fa0, $a3 -; LA64D-NEXT: addi.w $a1, $a2, 0 -; LA64D-NEXT: bne $a3, $a1, .LBB1_1 +; LA64D-NEXT: bne $a3, $a2, .LBB1_1 ; LA64D-NEXT: # %bb.2: # %atomicrmw.end ; LA64D-NEXT: ret %v = atomicrmw fsub ptr %p, float 1.0 acquire, align 4 @@ -171,6 +165,7 @@ define float @float_fmin_acquire(ptr %p) nounwind { ; LA64F-NEXT: fmin.s $fa2, $fa2, $fa1 ; LA64F-NEXT: movfr2gr.s $a1, $fa2 ; LA64F-NEXT: movfr2gr.s $a2, $fa0 +; LA64F-NEXT: addi.w $a2, $a2, 0 ; LA64F-NEXT: .LBB2_3: # %atomicrmw.start ; LA64F-NEXT: # Parent Loop BB2_1 Depth=1 ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 @@ -178,19 +173,17 @@ define float @float_fmin_acquire(ptr %p) nounwind { ; LA64F-NEXT: bne $a3, $a2, .LBB2_5 ; LA64F-NEXT: # %bb.4: # %atomicrmw.start ; LA64F-NEXT: # in Loop: Header=BB2_3 Depth=2 -; LA64F-NEXT: dbar 0 ; LA64F-NEXT: move $a4, $a1 ; LA64F-NEXT: sc.w $a4, $a0, 0 ; LA64F-NEXT: beqz $a4, .LBB2_3 ; LA64F-NEXT: b .LBB2_6 ; LA64F-NEXT: .LBB2_5: # %atomicrmw.start ; LA64F-NEXT: # in Loop: Header=BB2_1 Depth=1 -; LA64F-NEXT: dbar 1792 +; LA64F-NEXT: dbar 20 ; LA64F-NEXT: .LBB2_6: # %atomicrmw.start ; LA64F-NEXT: # in Loop: Header=BB2_1 Depth=1 ; LA64F-NEXT: movgr2fr.w $fa0, $a3 -; LA64F-NEXT: addi.w $a1, $a2, 0 -; LA64F-NEXT: bne $a3, $a1, .LBB2_1 +; LA64F-NEXT: bne $a3, $a2, .LBB2_1 ; LA64F-NEXT: # %bb.2: # %atomicrmw.end ; LA64F-NEXT: ret ; @@ -208,6 +201,7 @@ define float @float_fmin_acquire(ptr %p) nounwind { ; LA64D-NEXT: fmin.s $fa2, $fa2, $fa1 ; LA64D-NEXT: movfr2gr.s $a1, $fa2 ; LA64D-NEXT: movfr2gr.s $a2, $fa0 +; LA64D-NEXT: addi.w $a2, $a2, 0 ; LA64D-NEXT: .LBB2_3: # %atomicrmw.start ; LA64D-NEXT: # Parent Loop BB2_1 Depth=1 ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 @@ -215,19 +209,17 @@ define float @float_fmin_acquire(ptr %p) nounwind { ; LA64D-NEXT: bne $a3, $a2, .LBB2_5 ; LA64D-NEXT: # %bb.4: # %atomicrmw.start ; LA64D-NEXT: # in Loop: Header=BB2_3 Depth=2 -; LA64D-NEXT: dbar 0 ; LA64D-NEXT: move $a4, $a1 ; LA64D-NEXT: sc.w $a4, $a0, 0 ; LA64D-NEXT: beqz $a4, .LBB2_3 ; LA64D-NEXT: b .LBB2_6 ; LA64D-NEXT: .LBB2_5: # %atomicrmw.start ; LA64D-NEXT: # in Loop: Header=BB2_1 Depth=1 -; LA64D-NEXT: dbar 1792 +; LA64D-NEXT: dbar 20 ; LA64D-NEXT: .LBB2_6: # %atomicrmw.start ; LA64D-NEXT: # in Loop: Header=BB2_1 Depth=1 ; LA64D-NEXT: movgr2fr.w $fa0, $a3 -; LA64D-NEXT: addi.w $a1, $a2, 0 -; LA64D-NEXT: bne $a3, $a1, .LBB2_1 +; LA64D-NEXT: bne $a3, $a2, .LBB2_1 ; LA64D-NEXT: # %bb.2: # %atomicrmw.end ; LA64D-NEXT: ret %v = atomicrmw fmin ptr %p, float 1.0 acquire, align 4 @@ -249,6 +241,7 @@ define float @float_fmax_acquire(ptr %p) nounwind { ; LA64F-NEXT: fmax.s $fa2, $fa2, $fa1 ; LA64F-NEXT: movfr2gr.s $a1, $fa2 ; LA64F-NEXT: movfr2gr.s $a2, $fa0 +; LA64F-NEXT: addi.w $a2, $a2, 0 ; LA64F-NEXT: .LBB3_3: # %atomicrmw.start ; LA64F-NEXT: # Parent Loop BB3_1 Depth=1 ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 @@ -256,19 +249,17 @@ define float @float_fmax_acquire(ptr %p) nounwind { ; LA64F-NEXT: bne $a3, $a2, .LBB3_5 ; LA64F-NEXT: # %bb.4: # %atomicrmw.start ; LA64F-NEXT: # in Loop: Header=BB3_3 Depth=2 -; LA64F-NEXT: dbar 0 ; LA64F-NEXT: move $a4, $a1 ; LA64F-NEXT: sc.w $a4, $a0, 0 ; LA64F-NEXT: beqz $a4, .LBB3_3 ; LA64F-NEXT: b .LBB3_6 ; LA64F-NEXT: .LBB3_5: # %atomicrmw.start ; LA64F-NEXT: # in Loop: Header=BB3_1 Depth=1 -; LA64F-NEXT: dbar 1792 +; LA64F-NEXT: dbar 20 ; LA64F-NEXT: .LBB3_6: # %atomicrmw.start ; LA64F-NEXT: # in Loop: Header=BB3_1 Depth=1 ; LA64F-NEXT: movgr2fr.w $fa0, $a3 -; LA64F-NEXT: addi.w $a1, $a2, 0 -; LA64F-NEXT: bne $a3, $a1, .LBB3_1 +; LA64F-NEXT: bne $a3, $a2, .LBB3_1 ; LA64F-NEXT: # %bb.2: # %atomicrmw.end ; LA64F-NEXT: ret ; @@ -286,6 +277,7 @@ define float @float_fmax_acquire(ptr %p) nounwind { ; LA64D-NEXT: fmax.s $fa2, $fa2, $fa1 ; LA64D-NEXT: movfr2gr.s $a1, $fa2 ; LA64D-NEXT: movfr2gr.s $a2, $fa0 +; LA64D-NEXT: addi.w $a2, $a2, 0 ; LA64D-NEXT: .LBB3_3: # %atomicrmw.start ; LA64D-NEXT: # Parent Loop BB3_1 Depth=1 ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 @@ -293,19 +285,17 @@ define float @float_fmax_acquire(ptr %p) nounwind { ; LA64D-NEXT: bne $a3, $a2, .LBB3_5 ; LA64D-NEXT: # %bb.4: # %atomicrmw.start ; LA64D-NEXT: # in Loop: Header=BB3_3 Depth=2 -; LA64D-NEXT: dbar 0 ; LA64D-NEXT: move $a4, $a1 ; LA64D-NEXT: sc.w $a4, $a0, 0 ; LA64D-NEXT: beqz $a4, .LBB3_3 ; LA64D-NEXT: b .LBB3_6 ; LA64D-NEXT: .LBB3_5: # %atomicrmw.start ; LA64D-NEXT: # in Loop: Header=BB3_1 Depth=1 -; LA64D-NEXT: dbar 1792 +; LA64D-NEXT: dbar 20 ; LA64D-NEXT: .LBB3_6: # %atomicrmw.start ; LA64D-NEXT: # in Loop: Header=BB3_1 Depth=1 ; LA64D-NEXT: movgr2fr.w $fa0, $a3 -; LA64D-NEXT: addi.w $a1, $a2, 0 -; LA64D-NEXT: bne $a3, $a1, .LBB3_1 +; LA64D-NEXT: bne $a3, $a2, .LBB3_1 ; LA64D-NEXT: # %bb.2: # %atomicrmw.end ; LA64D-NEXT: ret %v = atomicrmw fmax ptr %p, float 1.0 acquire, align 4 @@ -689,3 +679,2715 @@ define double @double_fmax_acquire(ptr %p) nounwind { %v = atomicrmw fmax ptr %p, double 1.0 acquire, align 4 ret double %v } + +define float @float_fadd_release(ptr %p) nounwind { +; LA64F-LABEL: float_fadd_release: +; LA64F: # %bb.0: +; LA64F-NEXT: fld.s $fa0, $a0, 0 +; LA64F-NEXT: addi.w $a1, $zero, 1 +; LA64F-NEXT: movgr2fr.w $fa1, $a1 +; LA64F-NEXT: ffint.s.w $fa1, $fa1 +; LA64F-NEXT: .p2align 4, , 16 +; LA64F-NEXT: .LBB8_1: # %atomicrmw.start +; LA64F-NEXT: # =>This Loop Header: Depth=1 +; LA64F-NEXT: # Child Loop BB8_3 Depth 2 +; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 +; LA64F-NEXT: movfr2gr.s $a1, $fa2 +; LA64F-NEXT: movfr2gr.s $a2, $fa0 +; LA64F-NEXT: addi.w $a2, $a2, 0 +; LA64F-NEXT: .LBB8_3: # %atomicrmw.start +; LA64F-NEXT: # Parent Loop BB8_1 Depth=1 +; LA64F-NEXT: # => This Inner Loop Header: Depth=2 +; LA64F-NEXT: ll.w $a3, $a0, 0 +; LA64F-NEXT: bne $a3, $a2, .LBB8_5 +; LA64F-NEXT: # %bb.4: # %atomicrmw.start +; LA64F-NEXT: # in Loop: Header=BB8_3 Depth=2 +; LA64F-NEXT: move $a4, $a1 +; LA64F-NEXT: sc.w $a4, $a0, 0 +; LA64F-NEXT: beqz $a4, .LBB8_3 +; LA64F-NEXT: b .LBB8_6 +; LA64F-NEXT: .LBB8_5: # %atomicrmw.start +; LA64F-NEXT: # in Loop: Header=BB8_1 Depth=1 +; LA64F-NEXT: dbar 1792 +; LA64F-NEXT: .LBB8_6: # %atomicrmw.start +; LA64F-NEXT: # in Loop: Header=BB8_1 Depth=1 +; LA64F-NEXT: movgr2fr.w $fa0, $a3 +; LA64F-NEXT: bne $a3, $a2, .LBB8_1 +; LA64F-NEXT: # %bb.2: # %atomicrmw.end +; LA64F-NEXT: ret +; +; LA64D-LABEL: float_fadd_release: +; LA64D: # %bb.0: +; LA64D-NEXT: fld.s $fa0, $a0, 0 +; LA64D-NEXT: addi.w $a1, $zero, 1 +; LA64D-NEXT: movgr2fr.w $fa1, $a1 +; LA64D-NEXT: ffint.s.w $fa1, $fa1 +; LA64D-NEXT: .p2align 4, , 16 +; LA64D-NEXT: .LBB8_1: # %atomicrmw.start +; LA64D-NEXT: # =>This Loop Header: Depth=1 +; LA64D-NEXT: # Child Loop BB8_3 Depth 2 +; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 +; LA64D-NEXT: movfr2gr.s $a1, $fa2 +; LA64D-NEXT: movfr2gr.s $a2, $fa0 +; LA64D-NEXT: addi.w $a2, $a2, 0 +; LA64D-NEXT: .LBB8_3: # %atomicrmw.start +; LA64D-NEXT: # Parent Loop BB8_1 Depth=1 +; LA64D-NEXT: # => This Inner Loop Header: Depth=2 +; LA64D-NEXT: ll.w $a3, $a0, 0 +; LA64D-NEXT: bne $a3, $a2, .LBB8_5 +; LA64D-NEXT: # %bb.4: # %atomicrmw.start +; LA64D-NEXT: # in Loop: Header=BB8_3 Depth=2 +; LA64D-NEXT: move $a4, $a1 +; LA64D-NEXT: sc.w $a4, $a0, 0 +; LA64D-NEXT: beqz $a4, .LBB8_3 +; LA64D-NEXT: b .LBB8_6 +; LA64D-NEXT: .LBB8_5: # %atomicrmw.start +; LA64D-NEXT: # in Loop: Header=BB8_1 Depth=1 +; LA64D-NEXT: dbar 1792 +; LA64D-NEXT: .LBB8_6: # %atomicrmw.start +; LA64D-NEXT: # in Loop: Header=BB8_1 Depth=1 +; LA64D-NEXT: movgr2fr.w $fa0, $a3 +; LA64D-NEXT: bne $a3, $a2, .LBB8_1 +; LA64D-NEXT: # %bb.2: # %atomicrmw.end +; LA64D-NEXT: ret + %v = atomicrmw fadd ptr %p, float 1.0 release, align 4 + ret float %v +} + +define float @float_fsub_release(ptr %p) nounwind { +; LA64F-LABEL: float_fsub_release: +; LA64F: # %bb.0: +; LA64F-NEXT: fld.s $fa0, $a0, 0 +; LA64F-NEXT: pcalau12i $a1, %pc_hi20(.LCPI9_0) +; LA64F-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI9_0) +; LA64F-NEXT: fld.s $fa1, $a1, 0 +; LA64F-NEXT: .p2align 4, , 16 +; LA64F-NEXT: .LBB9_1: # %atomicrmw.start +; LA64F-NEXT: # =>This Loop Header: Depth=1 +; LA64F-NEXT: # Child Loop BB9_3 Depth 2 +; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 +; LA64F-NEXT: movfr2gr.s $a1, $fa2 +; LA64F-NEXT: movfr2gr.s $a2, $fa0 +; LA64F-NEXT: addi.w $a2, $a2, 0 +; LA64F-NEXT: .LBB9_3: # %atomicrmw.start +; LA64F-NEXT: # Parent Loop BB9_1 Depth=1 +; LA64F-NEXT: # => This Inner Loop Header: Depth=2 +; LA64F-NEXT: ll.w $a3, $a0, 0 +; LA64F-NEXT: bne $a3, $a2, .LBB9_5 +; LA64F-NEXT: # %bb.4: # %atomicrmw.start +; LA64F-NEXT: # in Loop: Header=BB9_3 Depth=2 +; LA64F-NEXT: move $a4, $a1 +; LA64F-NEXT: sc.w $a4, $a0, 0 +; LA64F-NEXT: beqz $a4, .LBB9_3 +; LA64F-NEXT: b .LBB9_6 +; LA64F-NEXT: .LBB9_5: # %atomicrmw.start +; LA64F-NEXT: # in Loop: Header=BB9_1 Depth=1 +; LA64F-NEXT: dbar 1792 +; LA64F-NEXT: .LBB9_6: # %atomicrmw.start +; LA64F-NEXT: # in Loop: Header=BB9_1 Depth=1 +; LA64F-NEXT: movgr2fr.w $fa0, $a3 +; LA64F-NEXT: bne $a3, $a2, .LBB9_1 +; LA64F-NEXT: # %bb.2: # %atomicrmw.end +; LA64F-NEXT: ret +; +; LA64D-LABEL: float_fsub_release: +; LA64D: # %bb.0: +; LA64D-NEXT: fld.s $fa0, $a0, 0 +; LA64D-NEXT: pcalau12i $a1, %pc_hi20(.LCPI9_0) +; LA64D-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI9_0) +; LA64D-NEXT: fld.s $fa1, $a1, 0 +; LA64D-NEXT: .p2align 4, , 16 +; LA64D-NEXT: .LBB9_1: # %atomicrmw.start +; LA64D-NEXT: # =>This Loop Header: Depth=1 +; LA64D-NEXT: # Child Loop BB9_3 Depth 2 +; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 +; LA64D-NEXT: movfr2gr.s $a1, $fa2 +; LA64D-NEXT: movfr2gr.s $a2, $fa0 +; LA64D-NEXT: addi.w $a2, $a2, 0 +; LA64D-NEXT: .LBB9_3: # %atomicrmw.start +; LA64D-NEXT: # Parent Loop BB9_1 Depth=1 +; LA64D-NEXT: # => This Inner Loop Header: Depth=2 +; LA64D-NEXT: ll.w $a3, $a0, 0 +; LA64D-NEXT: bne $a3, $a2, .LBB9_5 +; LA64D-NEXT: # %bb.4: # %atomicrmw.start +; LA64D-NEXT: # in Loop: Header=BB9_3 Depth=2 +; LA64D-NEXT: move $a4, $a1 +; LA64D-NEXT: sc.w $a4, $a0, 0 +; LA64D-NEXT: beqz $a4, .LBB9_3 +; LA64D-NEXT: b .LBB9_6 +; LA64D-NEXT: .LBB9_5: # %atomicrmw.start +; LA64D-NEXT: # in Loop: Header=BB9_1 Depth=1 +; LA64D-NEXT: dbar 1792 +; LA64D-NEXT: .LBB9_6: # %atomicrmw.start +; LA64D-NEXT: # in Loop: Header=BB9_1 Depth=1 +; LA64D-NEXT: movgr2fr.w $fa0, $a3 +; LA64D-NEXT: bne $a3, $a2, .LBB9_1 +; LA64D-NEXT: # %bb.2: # %atomicrmw.end +; LA64D-NEXT: ret + %v = atomicrmw fsub ptr %p, float 1.0 release, align 4 + ret float %v +} + +define float @float_fmin_release(ptr %p) nounwind { +; LA64F-LABEL: float_fmin_release: +; LA64F: # %bb.0: +; LA64F-NEXT: fld.s $fa0, $a0, 0 +; LA64F-NEXT: addi.w $a1, $zero, 1 +; LA64F-NEXT: movgr2fr.w $fa1, $a1 +; LA64F-NEXT: ffint.s.w $fa1, $fa1 +; LA64F-NEXT: .p2align 4, , 16 +; LA64F-NEXT: .LBB10_1: # %atomicrmw.start +; LA64F-NEXT: # =>This Loop Header: Depth=1 +; LA64F-NEXT: # Child Loop BB10_3 Depth 2 +; LA64F-NEXT: fmax.s $fa2, $fa0, $fa0 +; LA64F-NEXT: fmin.s $fa2, $fa2, $fa1 +; LA64F-NEXT: movfr2gr.s $a1, $fa2 +; LA64F-NEXT: movfr2gr.s $a2, $fa0 +; LA64F-NEXT: addi.w $a2, $a2, 0 +; LA64F-NEXT: .LBB10_3: # %atomicrmw.start +; LA64F-NEXT: # Parent Loop BB10_1 Depth=1 +; LA64F-NEXT: # => This Inner Loop Header: Depth=2 +; LA64F-NEXT: ll.w $a3, $a0, 0 +; LA64F-NEXT: bne $a3, $a2, .LBB10_5 +; LA64F-NEXT: # %bb.4: # %atomicrmw.start +; LA64F-NEXT: # in Loop: Header=BB10_3 Depth=2 +; LA64F-NEXT: move $a4, $a1 +; LA64F-NEXT: sc.w $a4, $a0, 0 +; LA64F-NEXT: beqz $a4, .LBB10_3 +; LA64F-NEXT: b .LBB10_6 +; LA64F-NEXT: .LBB10_5: # %atomicrmw.start +; LA64F-NEXT: # in Loop: Header=BB10_1 Depth=1 +; LA64F-NEXT: dbar 1792 +; LA64F-NEXT: .LBB10_6: # %atomicrmw.start +; LA64F-NEXT: # in Loop: Header=BB10_1 Depth=1 +; LA64F-NEXT: movgr2fr.w $fa0, $a3 +; LA64F-NEXT: bne $a3, $a2, .LBB10_1 +; LA64F-NEXT: # %bb.2: # %atomicrmw.end +; LA64F-NEXT: ret +; +; LA64D-LABEL: float_fmin_release: +; LA64D: # %bb.0: +; LA64D-NEXT: fld.s $fa0, $a0, 0 +; LA64D-NEXT: addi.w $a1, $zero, 1 +; LA64D-NEXT: movgr2fr.w $fa1, $a1 +; LA64D-NEXT: ffint.s.w $fa1, $fa1 +; LA64D-NEXT: .p2align 4, , 16 +; LA64D-NEXT: .LBB10_1: # %atomicrmw.start +; LA64D-NEXT: # =>This Loop Header: Depth=1 +; LA64D-NEXT: # Child Loop BB10_3 Depth 2 +; LA64D-NEXT: fmax.s $fa2, $fa0, $fa0 +; LA64D-NEXT: fmin.s $fa2, $fa2, $fa1 +; LA64D-NEXT: movfr2gr.s $a1, $fa2 +; LA64D-NEXT: movfr2gr.s $a2, $fa0 +; LA64D-NEXT: addi.w $a2, $a2, 0 +; LA64D-NEXT: .LBB10_3: # %atomicrmw.start +; LA64D-NEXT: # Parent Loop BB10_1 Depth=1 +; LA64D-NEXT: # => This Inner Loop Header: Depth=2 +; LA64D-NEXT: ll.w $a3, $a0, 0 +; LA64D-NEXT: bne $a3, $a2, .LBB10_5 +; LA64D-NEXT: # %bb.4: # %atomicrmw.start +; LA64D-NEXT: # in Loop: Header=BB10_3 Depth=2 +; LA64D-NEXT: move $a4, $a1 +; LA64D-NEXT: sc.w $a4, $a0, 0 +; LA64D-NEXT: beqz $a4, .LBB10_3 +; LA64D-NEXT: b .LBB10_6 +; LA64D-NEXT: .LBB10_5: # %atomicrmw.start +; LA64D-NEXT: # in Loop: Header=BB10_1 Depth=1 +; LA64D-NEXT: dbar 1792 +; LA64D-NEXT: .LBB10_6: # %atomicrmw.start +; LA64D-NEXT: # in Loop: Header=BB10_1 Depth=1 +; LA64D-NEXT: movgr2fr.w $fa0, $a3 +; LA64D-NEXT: bne $a3, $a2, .LBB10_1 +; LA64D-NEXT: # %bb.2: # %atomicrmw.end +; LA64D-NEXT: ret + %v = atomicrmw fmin ptr %p, float 1.0 release, align 4 + ret float %v +} + +define float @float_fmax_release(ptr %p) nounwind { +; LA64F-LABEL: float_fmax_release: +; LA64F: # %bb.0: +; LA64F-NEXT: fld.s $fa0, $a0, 0 +; LA64F-NEXT: addi.w $a1, $zero, 1 +; LA64F-NEXT: movgr2fr.w $fa1, $a1 +; LA64F-NEXT: ffint.s.w $fa1, $fa1 +; LA64F-NEXT: .p2align 4, , 16 +; LA64F-NEXT: .LBB11_1: # %atomicrmw.start +; LA64F-NEXT: # =>This Loop Header: Depth=1 +; LA64F-NEXT: # Child Loop BB11_3 Depth 2 +; LA64F-NEXT: fmax.s $fa2, $fa0, $fa0 +; LA64F-NEXT: fmax.s $fa2, $fa2, $fa1 +; LA64F-NEXT: movfr2gr.s $a1, $fa2 +; LA64F-NEXT: movfr2gr.s $a2, $fa0 +; LA64F-NEXT: addi.w $a2, $a2, 0 +; LA64F-NEXT: .LBB11_3: # %atomicrmw.start +; LA64F-NEXT: # Parent Loop BB11_1 Depth=1 +; LA64F-NEXT: # => This Inner Loop Header: Depth=2 +; LA64F-NEXT: ll.w $a3, $a0, 0 +; LA64F-NEXT: bne $a3, $a2, .LBB11_5 +; LA64F-NEXT: # %bb.4: # %atomicrmw.start +; LA64F-NEXT: # in Loop: Header=BB11_3 Depth=2 +; LA64F-NEXT: move $a4, $a1 +; LA64F-NEXT: sc.w $a4, $a0, 0 +; LA64F-NEXT: beqz $a4, .LBB11_3 +; LA64F-NEXT: b .LBB11_6 +; LA64F-NEXT: .LBB11_5: # %atomicrmw.start +; LA64F-NEXT: # in Loop: Header=BB11_1 Depth=1 +; LA64F-NEXT: dbar 1792 +; LA64F-NEXT: .LBB11_6: # %atomicrmw.start +; LA64F-NEXT: # in Loop: Header=BB11_1 Depth=1 +; LA64F-NEXT: movgr2fr.w $fa0, $a3 +; LA64F-NEXT: bne $a3, $a2, .LBB11_1 +; LA64F-NEXT: # %bb.2: # %atomicrmw.end +; LA64F-NEXT: ret +; +; LA64D-LABEL: float_fmax_release: +; LA64D: # %bb.0: +; LA64D-NEXT: fld.s $fa0, $a0, 0 +; LA64D-NEXT: addi.w $a1, $zero, 1 +; LA64D-NEXT: movgr2fr.w $fa1, $a1 +; LA64D-NEXT: ffint.s.w $fa1, $fa1 +; LA64D-NEXT: .p2align 4, , 16 +; LA64D-NEXT: .LBB11_1: # %atomicrmw.start +; LA64D-NEXT: # =>This Loop Header: Depth=1 +; LA64D-NEXT: # Child Loop BB11_3 Depth 2 +; LA64D-NEXT: fmax.s $fa2, $fa0, $fa0 +; LA64D-NEXT: fmax.s $fa2, $fa2, $fa1 +; LA64D-NEXT: movfr2gr.s $a1, $fa2 +; LA64D-NEXT: movfr2gr.s $a2, $fa0 +; LA64D-NEXT: addi.w $a2, $a2, 0 +; LA64D-NEXT: .LBB11_3: # %atomicrmw.start +; LA64D-NEXT: # Parent Loop BB11_1 Depth=1 +; LA64D-NEXT: # => This Inner Loop Header: Depth=2 +; LA64D-NEXT: ll.w $a3, $a0, 0 +; LA64D-NEXT: bne $a3, $a2, .LBB11_5 +; LA64D-NEXT: # %bb.4: # %atomicrmw.start +; LA64D-NEXT: # in Loop: Header=BB11_3 Depth=2 +; LA64D-NEXT: move $a4, $a1 +; LA64D-NEXT: sc.w $a4, $a0, 0 +; LA64D-NEXT: beqz $a4, .LBB11_3 +; LA64D-NEXT: b .LBB11_6 +; LA64D-NEXT: .LBB11_5: # %atomicrmw.start +; LA64D-NEXT: # in Loop: Header=BB11_1 Depth=1 +; LA64D-NEXT: dbar 1792 +; LA64D-NEXT: .LBB11_6: # %atomicrmw.start +; LA64D-NEXT: # in Loop: Header=BB11_1 Depth=1 +; LA64D-NEXT: movgr2fr.w $fa0, $a3 +; LA64D-NEXT: bne $a3, $a2, .LBB11_1 +; LA64D-NEXT: # %bb.2: # %atomicrmw.end +; LA64D-NEXT: ret + %v = atomicrmw fmax ptr %p, float 1.0 release, align 4 + ret float %v +} + +define double @double_fadd_release(ptr %p) nounwind { +; LA64F-LABEL: double_fadd_release: +; LA64F: # %bb.0: +; LA64F-NEXT: addi.d $sp, $sp, -80 +; LA64F-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill +; LA64F-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s4, $sp, 24 # 8-byte Folded Spill +; LA64F-NEXT: move $fp, $a0 +; LA64F-NEXT: ld.d $a0, $a0, 0 +; LA64F-NEXT: lu52i.d $s0, $zero, 1023 +; LA64F-NEXT: ori $s1, $zero, 8 +; LA64F-NEXT: addi.d $s2, $sp, 16 +; LA64F-NEXT: addi.d $s3, $sp, 8 +; LA64F-NEXT: ori $s4, $zero, 3 +; LA64F-NEXT: .p2align 4, , 16 +; LA64F-NEXT: .LBB12_1: # %atomicrmw.start +; LA64F-NEXT: # =>This Inner Loop Header: Depth=1 +; LA64F-NEXT: st.d $a0, $sp, 16 +; LA64F-NEXT: move $a1, $s0 +; LA64F-NEXT: bl %plt(__adddf3) +; LA64F-NEXT: st.d $a0, $sp, 8 +; LA64F-NEXT: move $a0, $s1 +; LA64F-NEXT: move $a1, $fp +; LA64F-NEXT: move $a2, $s2 +; LA64F-NEXT: move $a3, $s3 +; LA64F-NEXT: move $a4, $s4 +; LA64F-NEXT: move $a5, $zero +; LA64F-NEXT: bl %plt(__atomic_compare_exchange) +; LA64F-NEXT: move $a1, $a0 +; LA64F-NEXT: ld.d $a0, $sp, 16 +; LA64F-NEXT: beqz $a1, .LBB12_1 +; LA64F-NEXT: # %bb.2: # %atomicrmw.end +; LA64F-NEXT: ld.d $s4, $sp, 24 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload +; LA64F-NEXT: addi.d $sp, $sp, 80 +; LA64F-NEXT: ret +; +; LA64D-LABEL: double_fadd_release: +; LA64D: # %bb.0: +; LA64D-NEXT: addi.d $sp, $sp, -80 +; LA64D-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill +; LA64D-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill +; LA64D-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill +; LA64D-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill +; LA64D-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill +; LA64D-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill +; LA64D-NEXT: fst.d $fs0, $sp, 24 # 8-byte Folded Spill +; LA64D-NEXT: move $fp, $a0 +; LA64D-NEXT: fld.d $fa0, $a0, 0 +; LA64D-NEXT: addi.d $a0, $zero, 1 +; LA64D-NEXT: movgr2fr.d $fa1, $a0 +; LA64D-NEXT: ffint.d.l $fs0, $fa1 +; LA64D-NEXT: ori $s0, $zero, 8 +; LA64D-NEXT: addi.d $s1, $sp, 16 +; LA64D-NEXT: addi.d $s2, $sp, 8 +; LA64D-NEXT: ori $s3, $zero, 3 +; LA64D-NEXT: .p2align 4, , 16 +; LA64D-NEXT: .LBB12_1: # %atomicrmw.start +; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 +; LA64D-NEXT: fst.d $fa0, $sp, 16 +; LA64D-NEXT: fadd.d $fa0, $fa0, $fs0 +; LA64D-NEXT: fst.d $fa0, $sp, 8 +; LA64D-NEXT: move $a0, $s0 +; LA64D-NEXT: move $a1, $fp +; LA64D-NEXT: move $a2, $s1 +; LA64D-NEXT: move $a3, $s2 +; LA64D-NEXT: move $a4, $s3 +; LA64D-NEXT: move $a5, $zero +; LA64D-NEXT: bl %plt(__atomic_compare_exchange) +; LA64D-NEXT: fld.d $fa0, $sp, 16 +; LA64D-NEXT: beqz $a0, .LBB12_1 +; LA64D-NEXT: # %bb.2: # %atomicrmw.end +; LA64D-NEXT: fld.d $fs0, $sp, 24 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload +; LA64D-NEXT: addi.d $sp, $sp, 80 +; LA64D-NEXT: ret + %v = atomicrmw fadd ptr %p, double 1.0 release, align 4 + ret double %v +} + +define double @double_fsub_release(ptr %p) nounwind { +; LA64F-LABEL: double_fsub_release: +; LA64F: # %bb.0: +; LA64F-NEXT: addi.d $sp, $sp, -80 +; LA64F-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill +; LA64F-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s4, $sp, 24 # 8-byte Folded Spill +; LA64F-NEXT: move $fp, $a0 +; LA64F-NEXT: ld.d $a0, $a0, 0 +; LA64F-NEXT: lu52i.d $s0, $zero, -1025 +; LA64F-NEXT: ori $s1, $zero, 8 +; LA64F-NEXT: addi.d $s2, $sp, 16 +; LA64F-NEXT: addi.d $s3, $sp, 8 +; LA64F-NEXT: ori $s4, $zero, 3 +; LA64F-NEXT: .p2align 4, , 16 +; LA64F-NEXT: .LBB13_1: # %atomicrmw.start +; LA64F-NEXT: # =>This Inner Loop Header: Depth=1 +; LA64F-NEXT: st.d $a0, $sp, 16 +; LA64F-NEXT: move $a1, $s0 +; LA64F-NEXT: bl %plt(__adddf3) +; LA64F-NEXT: st.d $a0, $sp, 8 +; LA64F-NEXT: move $a0, $s1 +; LA64F-NEXT: move $a1, $fp +; LA64F-NEXT: move $a2, $s2 +; LA64F-NEXT: move $a3, $s3 +; LA64F-NEXT: move $a4, $s4 +; LA64F-NEXT: move $a5, $zero +; LA64F-NEXT: bl %plt(__atomic_compare_exchange) +; LA64F-NEXT: move $a1, $a0 +; LA64F-NEXT: ld.d $a0, $sp, 16 +; LA64F-NEXT: beqz $a1, .LBB13_1 +; LA64F-NEXT: # %bb.2: # %atomicrmw.end +; LA64F-NEXT: ld.d $s4, $sp, 24 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload +; LA64F-NEXT: addi.d $sp, $sp, 80 +; LA64F-NEXT: ret +; +; LA64D-LABEL: double_fsub_release: +; LA64D: # %bb.0: +; LA64D-NEXT: addi.d $sp, $sp, -80 +; LA64D-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill +; LA64D-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill +; LA64D-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill +; LA64D-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill +; LA64D-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill +; LA64D-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill +; LA64D-NEXT: fst.d $fs0, $sp, 24 # 8-byte Folded Spill +; LA64D-NEXT: move $fp, $a0 +; LA64D-NEXT: fld.d $fa0, $a0, 0 +; LA64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI13_0) +; LA64D-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI13_0) +; LA64D-NEXT: fld.d $fs0, $a0, 0 +; LA64D-NEXT: ori $s0, $zero, 8 +; LA64D-NEXT: addi.d $s1, $sp, 16 +; LA64D-NEXT: addi.d $s2, $sp, 8 +; LA64D-NEXT: ori $s3, $zero, 3 +; LA64D-NEXT: .p2align 4, , 16 +; LA64D-NEXT: .LBB13_1: # %atomicrmw.start +; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 +; LA64D-NEXT: fst.d $fa0, $sp, 16 +; LA64D-NEXT: fadd.d $fa0, $fa0, $fs0 +; LA64D-NEXT: fst.d $fa0, $sp, 8 +; LA64D-NEXT: move $a0, $s0 +; LA64D-NEXT: move $a1, $fp +; LA64D-NEXT: move $a2, $s1 +; LA64D-NEXT: move $a3, $s2 +; LA64D-NEXT: move $a4, $s3 +; LA64D-NEXT: move $a5, $zero +; LA64D-NEXT: bl %plt(__atomic_compare_exchange) +; LA64D-NEXT: fld.d $fa0, $sp, 16 +; LA64D-NEXT: beqz $a0, .LBB13_1 +; LA64D-NEXT: # %bb.2: # %atomicrmw.end +; LA64D-NEXT: fld.d $fs0, $sp, 24 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload +; LA64D-NEXT: addi.d $sp, $sp, 80 +; LA64D-NEXT: ret + %v = atomicrmw fsub ptr %p, double 1.0 release, align 4 + ret double %v +} + +define double @double_fmin_release(ptr %p) nounwind { +; LA64F-LABEL: double_fmin_release: +; LA64F: # %bb.0: +; LA64F-NEXT: addi.d $sp, $sp, -80 +; LA64F-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill +; LA64F-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s4, $sp, 24 # 8-byte Folded Spill +; LA64F-NEXT: move $fp, $a0 +; LA64F-NEXT: ld.d $a0, $a0, 0 +; LA64F-NEXT: lu52i.d $s0, $zero, 1023 +; LA64F-NEXT: ori $s1, $zero, 8 +; LA64F-NEXT: addi.d $s2, $sp, 16 +; LA64F-NEXT: addi.d $s3, $sp, 8 +; LA64F-NEXT: ori $s4, $zero, 3 +; LA64F-NEXT: .p2align 4, , 16 +; LA64F-NEXT: .LBB14_1: # %atomicrmw.start +; LA64F-NEXT: # =>This Inner Loop Header: Depth=1 +; LA64F-NEXT: st.d $a0, $sp, 16 +; LA64F-NEXT: move $a1, $s0 +; LA64F-NEXT: bl %plt(fmin) +; LA64F-NEXT: st.d $a0, $sp, 8 +; LA64F-NEXT: move $a0, $s1 +; LA64F-NEXT: move $a1, $fp +; LA64F-NEXT: move $a2, $s2 +; LA64F-NEXT: move $a3, $s3 +; LA64F-NEXT: move $a4, $s4 +; LA64F-NEXT: move $a5, $zero +; LA64F-NEXT: bl %plt(__atomic_compare_exchange) +; LA64F-NEXT: move $a1, $a0 +; LA64F-NEXT: ld.d $a0, $sp, 16 +; LA64F-NEXT: beqz $a1, .LBB14_1 +; LA64F-NEXT: # %bb.2: # %atomicrmw.end +; LA64F-NEXT: ld.d $s4, $sp, 24 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload +; LA64F-NEXT: addi.d $sp, $sp, 80 +; LA64F-NEXT: ret +; +; LA64D-LABEL: double_fmin_release: +; LA64D: # %bb.0: +; LA64D-NEXT: addi.d $sp, $sp, -80 +; LA64D-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill +; LA64D-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill +; LA64D-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill +; LA64D-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill +; LA64D-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill +; LA64D-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill +; LA64D-NEXT: fst.d $fs0, $sp, 24 # 8-byte Folded Spill +; LA64D-NEXT: move $fp, $a0 +; LA64D-NEXT: fld.d $fa0, $a0, 0 +; LA64D-NEXT: addi.d $a0, $zero, 1 +; LA64D-NEXT: movgr2fr.d $fa1, $a0 +; LA64D-NEXT: ffint.d.l $fs0, $fa1 +; LA64D-NEXT: ori $s0, $zero, 8 +; LA64D-NEXT: addi.d $s1, $sp, 16 +; LA64D-NEXT: addi.d $s2, $sp, 8 +; LA64D-NEXT: ori $s3, $zero, 3 +; LA64D-NEXT: .p2align 4, , 16 +; LA64D-NEXT: .LBB14_1: # %atomicrmw.start +; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 +; LA64D-NEXT: fst.d $fa0, $sp, 16 +; LA64D-NEXT: fmax.d $fa0, $fa0, $fa0 +; LA64D-NEXT: fmin.d $fa0, $fa0, $fs0 +; LA64D-NEXT: fst.d $fa0, $sp, 8 +; LA64D-NEXT: move $a0, $s0 +; LA64D-NEXT: move $a1, $fp +; LA64D-NEXT: move $a2, $s1 +; LA64D-NEXT: move $a3, $s2 +; LA64D-NEXT: move $a4, $s3 +; LA64D-NEXT: move $a5, $zero +; LA64D-NEXT: bl %plt(__atomic_compare_exchange) +; LA64D-NEXT: fld.d $fa0, $sp, 16 +; LA64D-NEXT: beqz $a0, .LBB14_1 +; LA64D-NEXT: # %bb.2: # %atomicrmw.end +; LA64D-NEXT: fld.d $fs0, $sp, 24 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload +; LA64D-NEXT: addi.d $sp, $sp, 80 +; LA64D-NEXT: ret + %v = atomicrmw fmin ptr %p, double 1.0 release, align 4 + ret double %v +} + +define double @double_fmax_release(ptr %p) nounwind { +; LA64F-LABEL: double_fmax_release: +; LA64F: # %bb.0: +; LA64F-NEXT: addi.d $sp, $sp, -80 +; LA64F-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill +; LA64F-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s4, $sp, 24 # 8-byte Folded Spill +; LA64F-NEXT: move $fp, $a0 +; LA64F-NEXT: ld.d $a0, $a0, 0 +; LA64F-NEXT: lu52i.d $s0, $zero, 1023 +; LA64F-NEXT: ori $s1, $zero, 8 +; LA64F-NEXT: addi.d $s2, $sp, 16 +; LA64F-NEXT: addi.d $s3, $sp, 8 +; LA64F-NEXT: ori $s4, $zero, 3 +; LA64F-NEXT: .p2align 4, , 16 +; LA64F-NEXT: .LBB15_1: # %atomicrmw.start +; LA64F-NEXT: # =>This Inner Loop Header: Depth=1 +; LA64F-NEXT: st.d $a0, $sp, 16 +; LA64F-NEXT: move $a1, $s0 +; LA64F-NEXT: bl %plt(fmax) +; LA64F-NEXT: st.d $a0, $sp, 8 +; LA64F-NEXT: move $a0, $s1 +; LA64F-NEXT: move $a1, $fp +; LA64F-NEXT: move $a2, $s2 +; LA64F-NEXT: move $a3, $s3 +; LA64F-NEXT: move $a4, $s4 +; LA64F-NEXT: move $a5, $zero +; LA64F-NEXT: bl %plt(__atomic_compare_exchange) +; LA64F-NEXT: move $a1, $a0 +; LA64F-NEXT: ld.d $a0, $sp, 16 +; LA64F-NEXT: beqz $a1, .LBB15_1 +; LA64F-NEXT: # %bb.2: # %atomicrmw.end +; LA64F-NEXT: ld.d $s4, $sp, 24 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload +; LA64F-NEXT: addi.d $sp, $sp, 80 +; LA64F-NEXT: ret +; +; LA64D-LABEL: double_fmax_release: +; LA64D: # %bb.0: +; LA64D-NEXT: addi.d $sp, $sp, -80 +; LA64D-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill +; LA64D-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill +; LA64D-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill +; LA64D-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill +; LA64D-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill +; LA64D-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill +; LA64D-NEXT: fst.d $fs0, $sp, 24 # 8-byte Folded Spill +; LA64D-NEXT: move $fp, $a0 +; LA64D-NEXT: fld.d $fa0, $a0, 0 +; LA64D-NEXT: addi.d $a0, $zero, 1 +; LA64D-NEXT: movgr2fr.d $fa1, $a0 +; LA64D-NEXT: ffint.d.l $fs0, $fa1 +; LA64D-NEXT: ori $s0, $zero, 8 +; LA64D-NEXT: addi.d $s1, $sp, 16 +; LA64D-NEXT: addi.d $s2, $sp, 8 +; LA64D-NEXT: ori $s3, $zero, 3 +; LA64D-NEXT: .p2align 4, , 16 +; LA64D-NEXT: .LBB15_1: # %atomicrmw.start +; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 +; LA64D-NEXT: fst.d $fa0, $sp, 16 +; LA64D-NEXT: fmax.d $fa0, $fa0, $fa0 +; LA64D-NEXT: fmax.d $fa0, $fa0, $fs0 +; LA64D-NEXT: fst.d $fa0, $sp, 8 +; LA64D-NEXT: move $a0, $s0 +; LA64D-NEXT: move $a1, $fp +; LA64D-NEXT: move $a2, $s1 +; LA64D-NEXT: move $a3, $s2 +; LA64D-NEXT: move $a4, $s3 +; LA64D-NEXT: move $a5, $zero +; LA64D-NEXT: bl %plt(__atomic_compare_exchange) +; LA64D-NEXT: fld.d $fa0, $sp, 16 +; LA64D-NEXT: beqz $a0, .LBB15_1 +; LA64D-NEXT: # %bb.2: # %atomicrmw.end +; LA64D-NEXT: fld.d $fs0, $sp, 24 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload +; LA64D-NEXT: addi.d $sp, $sp, 80 +; LA64D-NEXT: ret + %v = atomicrmw fmax ptr %p, double 1.0 release, align 4 + ret double %v +} + +define float @float_fadd_acq_rel(ptr %p) nounwind { +; LA64F-LABEL: float_fadd_acq_rel: +; LA64F: # %bb.0: +; LA64F-NEXT: fld.s $fa0, $a0, 0 +; LA64F-NEXT: addi.w $a1, $zero, 1 +; LA64F-NEXT: movgr2fr.w $fa1, $a1 +; LA64F-NEXT: ffint.s.w $fa1, $fa1 +; LA64F-NEXT: .p2align 4, , 16 +; LA64F-NEXT: .LBB16_1: # %atomicrmw.start +; LA64F-NEXT: # =>This Loop Header: Depth=1 +; LA64F-NEXT: # Child Loop BB16_3 Depth 2 +; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 +; LA64F-NEXT: movfr2gr.s $a1, $fa2 +; LA64F-NEXT: movfr2gr.s $a2, $fa0 +; LA64F-NEXT: addi.w $a2, $a2, 0 +; LA64F-NEXT: .LBB16_3: # %atomicrmw.start +; LA64F-NEXT: # Parent Loop BB16_1 Depth=1 +; LA64F-NEXT: # => This Inner Loop Header: Depth=2 +; LA64F-NEXT: ll.w $a3, $a0, 0 +; LA64F-NEXT: bne $a3, $a2, .LBB16_5 +; LA64F-NEXT: # %bb.4: # %atomicrmw.start +; LA64F-NEXT: # in Loop: Header=BB16_3 Depth=2 +; LA64F-NEXT: move $a4, $a1 +; LA64F-NEXT: sc.w $a4, $a0, 0 +; LA64F-NEXT: beqz $a4, .LBB16_3 +; LA64F-NEXT: b .LBB16_6 +; LA64F-NEXT: .LBB16_5: # %atomicrmw.start +; LA64F-NEXT: # in Loop: Header=BB16_1 Depth=1 +; LA64F-NEXT: dbar 20 +; LA64F-NEXT: .LBB16_6: # %atomicrmw.start +; LA64F-NEXT: # in Loop: Header=BB16_1 Depth=1 +; LA64F-NEXT: movgr2fr.w $fa0, $a3 +; LA64F-NEXT: bne $a3, $a2, .LBB16_1 +; LA64F-NEXT: # %bb.2: # %atomicrmw.end +; LA64F-NEXT: ret +; +; LA64D-LABEL: float_fadd_acq_rel: +; LA64D: # %bb.0: +; LA64D-NEXT: fld.s $fa0, $a0, 0 +; LA64D-NEXT: addi.w $a1, $zero, 1 +; LA64D-NEXT: movgr2fr.w $fa1, $a1 +; LA64D-NEXT: ffint.s.w $fa1, $fa1 +; LA64D-NEXT: .p2align 4, , 16 +; LA64D-NEXT: .LBB16_1: # %atomicrmw.start +; LA64D-NEXT: # =>This Loop Header: Depth=1 +; LA64D-NEXT: # Child Loop BB16_3 Depth 2 +; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 +; LA64D-NEXT: movfr2gr.s $a1, $fa2 +; LA64D-NEXT: movfr2gr.s $a2, $fa0 +; LA64D-NEXT: addi.w $a2, $a2, 0 +; LA64D-NEXT: .LBB16_3: # %atomicrmw.start +; LA64D-NEXT: # Parent Loop BB16_1 Depth=1 +; LA64D-NEXT: # => This Inner Loop Header: Depth=2 +; LA64D-NEXT: ll.w $a3, $a0, 0 +; LA64D-NEXT: bne $a3, $a2, .LBB16_5 +; LA64D-NEXT: # %bb.4: # %atomicrmw.start +; LA64D-NEXT: # in Loop: Header=BB16_3 Depth=2 +; LA64D-NEXT: move $a4, $a1 +; LA64D-NEXT: sc.w $a4, $a0, 0 +; LA64D-NEXT: beqz $a4, .LBB16_3 +; LA64D-NEXT: b .LBB16_6 +; LA64D-NEXT: .LBB16_5: # %atomicrmw.start +; LA64D-NEXT: # in Loop: Header=BB16_1 Depth=1 +; LA64D-NEXT: dbar 20 +; LA64D-NEXT: .LBB16_6: # %atomicrmw.start +; LA64D-NEXT: # in Loop: Header=BB16_1 Depth=1 +; LA64D-NEXT: movgr2fr.w $fa0, $a3 +; LA64D-NEXT: bne $a3, $a2, .LBB16_1 +; LA64D-NEXT: # %bb.2: # %atomicrmw.end +; LA64D-NEXT: ret + %v = atomicrmw fadd ptr %p, float 1.0 acq_rel, align 4 + ret float %v +} + +define float @float_fsub_acq_rel(ptr %p) nounwind { +; LA64F-LABEL: float_fsub_acq_rel: +; LA64F: # %bb.0: +; LA64F-NEXT: fld.s $fa0, $a0, 0 +; LA64F-NEXT: pcalau12i $a1, %pc_hi20(.LCPI17_0) +; LA64F-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI17_0) +; LA64F-NEXT: fld.s $fa1, $a1, 0 +; LA64F-NEXT: .p2align 4, , 16 +; LA64F-NEXT: .LBB17_1: # %atomicrmw.start +; LA64F-NEXT: # =>This Loop Header: Depth=1 +; LA64F-NEXT: # Child Loop BB17_3 Depth 2 +; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 +; LA64F-NEXT: movfr2gr.s $a1, $fa2 +; LA64F-NEXT: movfr2gr.s $a2, $fa0 +; LA64F-NEXT: addi.w $a2, $a2, 0 +; LA64F-NEXT: .LBB17_3: # %atomicrmw.start +; LA64F-NEXT: # Parent Loop BB17_1 Depth=1 +; LA64F-NEXT: # => This Inner Loop Header: Depth=2 +; LA64F-NEXT: ll.w $a3, $a0, 0 +; LA64F-NEXT: bne $a3, $a2, .LBB17_5 +; LA64F-NEXT: # %bb.4: # %atomicrmw.start +; LA64F-NEXT: # in Loop: Header=BB17_3 Depth=2 +; LA64F-NEXT: move $a4, $a1 +; LA64F-NEXT: sc.w $a4, $a0, 0 +; LA64F-NEXT: beqz $a4, .LBB17_3 +; LA64F-NEXT: b .LBB17_6 +; LA64F-NEXT: .LBB17_5: # %atomicrmw.start +; LA64F-NEXT: # in Loop: Header=BB17_1 Depth=1 +; LA64F-NEXT: dbar 20 +; LA64F-NEXT: .LBB17_6: # %atomicrmw.start +; LA64F-NEXT: # in Loop: Header=BB17_1 Depth=1 +; LA64F-NEXT: movgr2fr.w $fa0, $a3 +; LA64F-NEXT: bne $a3, $a2, .LBB17_1 +; LA64F-NEXT: # %bb.2: # %atomicrmw.end +; LA64F-NEXT: ret +; +; LA64D-LABEL: float_fsub_acq_rel: +; LA64D: # %bb.0: +; LA64D-NEXT: fld.s $fa0, $a0, 0 +; LA64D-NEXT: pcalau12i $a1, %pc_hi20(.LCPI17_0) +; LA64D-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI17_0) +; LA64D-NEXT: fld.s $fa1, $a1, 0 +; LA64D-NEXT: .p2align 4, , 16 +; LA64D-NEXT: .LBB17_1: # %atomicrmw.start +; LA64D-NEXT: # =>This Loop Header: Depth=1 +; LA64D-NEXT: # Child Loop BB17_3 Depth 2 +; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 +; LA64D-NEXT: movfr2gr.s $a1, $fa2 +; LA64D-NEXT: movfr2gr.s $a2, $fa0 +; LA64D-NEXT: addi.w $a2, $a2, 0 +; LA64D-NEXT: .LBB17_3: # %atomicrmw.start +; LA64D-NEXT: # Parent Loop BB17_1 Depth=1 +; LA64D-NEXT: # => This Inner Loop Header: Depth=2 +; LA64D-NEXT: ll.w $a3, $a0, 0 +; LA64D-NEXT: bne $a3, $a2, .LBB17_5 +; LA64D-NEXT: # %bb.4: # %atomicrmw.start +; LA64D-NEXT: # in Loop: Header=BB17_3 Depth=2 +; LA64D-NEXT: move $a4, $a1 +; LA64D-NEXT: sc.w $a4, $a0, 0 +; LA64D-NEXT: beqz $a4, .LBB17_3 +; LA64D-NEXT: b .LBB17_6 +; LA64D-NEXT: .LBB17_5: # %atomicrmw.start +; LA64D-NEXT: # in Loop: Header=BB17_1 Depth=1 +; LA64D-NEXT: dbar 20 +; LA64D-NEXT: .LBB17_6: # %atomicrmw.start +; LA64D-NEXT: # in Loop: Header=BB17_1 Depth=1 +; LA64D-NEXT: movgr2fr.w $fa0, $a3 +; LA64D-NEXT: bne $a3, $a2, .LBB17_1 +; LA64D-NEXT: # %bb.2: # %atomicrmw.end +; LA64D-NEXT: ret + %v = atomicrmw fsub ptr %p, float 1.0 acq_rel, align 4 + ret float %v +} + +define float @float_fmin_acq_rel(ptr %p) nounwind { +; LA64F-LABEL: float_fmin_acq_rel: +; LA64F: # %bb.0: +; LA64F-NEXT: fld.s $fa0, $a0, 0 +; LA64F-NEXT: addi.w $a1, $zero, 1 +; LA64F-NEXT: movgr2fr.w $fa1, $a1 +; LA64F-NEXT: ffint.s.w $fa1, $fa1 +; LA64F-NEXT: .p2align 4, , 16 +; LA64F-NEXT: .LBB18_1: # %atomicrmw.start +; LA64F-NEXT: # =>This Loop Header: Depth=1 +; LA64F-NEXT: # Child Loop BB18_3 Depth 2 +; LA64F-NEXT: fmax.s $fa2, $fa0, $fa0 +; LA64F-NEXT: fmin.s $fa2, $fa2, $fa1 +; LA64F-NEXT: movfr2gr.s $a1, $fa2 +; LA64F-NEXT: movfr2gr.s $a2, $fa0 +; LA64F-NEXT: addi.w $a2, $a2, 0 +; LA64F-NEXT: .LBB18_3: # %atomicrmw.start +; LA64F-NEXT: # Parent Loop BB18_1 Depth=1 +; LA64F-NEXT: # => This Inner Loop Header: Depth=2 +; LA64F-NEXT: ll.w $a3, $a0, 0 +; LA64F-NEXT: bne $a3, $a2, .LBB18_5 +; LA64F-NEXT: # %bb.4: # %atomicrmw.start +; LA64F-NEXT: # in Loop: Header=BB18_3 Depth=2 +; LA64F-NEXT: move $a4, $a1 +; LA64F-NEXT: sc.w $a4, $a0, 0 +; LA64F-NEXT: beqz $a4, .LBB18_3 +; LA64F-NEXT: b .LBB18_6 +; LA64F-NEXT: .LBB18_5: # %atomicrmw.start +; LA64F-NEXT: # in Loop: Header=BB18_1 Depth=1 +; LA64F-NEXT: dbar 20 +; LA64F-NEXT: .LBB18_6: # %atomicrmw.start +; LA64F-NEXT: # in Loop: Header=BB18_1 Depth=1 +; LA64F-NEXT: movgr2fr.w $fa0, $a3 +; LA64F-NEXT: bne $a3, $a2, .LBB18_1 +; LA64F-NEXT: # %bb.2: # %atomicrmw.end +; LA64F-NEXT: ret +; +; LA64D-LABEL: float_fmin_acq_rel: +; LA64D: # %bb.0: +; LA64D-NEXT: fld.s $fa0, $a0, 0 +; LA64D-NEXT: addi.w $a1, $zero, 1 +; LA64D-NEXT: movgr2fr.w $fa1, $a1 +; LA64D-NEXT: ffint.s.w $fa1, $fa1 +; LA64D-NEXT: .p2align 4, , 16 +; LA64D-NEXT: .LBB18_1: # %atomicrmw.start +; LA64D-NEXT: # =>This Loop Header: Depth=1 +; LA64D-NEXT: # Child Loop BB18_3 Depth 2 +; LA64D-NEXT: fmax.s $fa2, $fa0, $fa0 +; LA64D-NEXT: fmin.s $fa2, $fa2, $fa1 +; LA64D-NEXT: movfr2gr.s $a1, $fa2 +; LA64D-NEXT: movfr2gr.s $a2, $fa0 +; LA64D-NEXT: addi.w $a2, $a2, 0 +; LA64D-NEXT: .LBB18_3: # %atomicrmw.start +; LA64D-NEXT: # Parent Loop BB18_1 Depth=1 +; LA64D-NEXT: # => This Inner Loop Header: Depth=2 +; LA64D-NEXT: ll.w $a3, $a0, 0 +; LA64D-NEXT: bne $a3, $a2, .LBB18_5 +; LA64D-NEXT: # %bb.4: # %atomicrmw.start +; LA64D-NEXT: # in Loop: Header=BB18_3 Depth=2 +; LA64D-NEXT: move $a4, $a1 +; LA64D-NEXT: sc.w $a4, $a0, 0 +; LA64D-NEXT: beqz $a4, .LBB18_3 +; LA64D-NEXT: b .LBB18_6 +; LA64D-NEXT: .LBB18_5: # %atomicrmw.start +; LA64D-NEXT: # in Loop: Header=BB18_1 Depth=1 +; LA64D-NEXT: dbar 20 +; LA64D-NEXT: .LBB18_6: # %atomicrmw.start +; LA64D-NEXT: # in Loop: Header=BB18_1 Depth=1 +; LA64D-NEXT: movgr2fr.w $fa0, $a3 +; LA64D-NEXT: bne $a3, $a2, .LBB18_1 +; LA64D-NEXT: # %bb.2: # %atomicrmw.end +; LA64D-NEXT: ret + %v = atomicrmw fmin ptr %p, float 1.0 acq_rel, align 4 + ret float %v +} + +define float @float_fmax_acq_rel(ptr %p) nounwind { +; LA64F-LABEL: float_fmax_acq_rel: +; LA64F: # %bb.0: +; LA64F-NEXT: fld.s $fa0, $a0, 0 +; LA64F-NEXT: addi.w $a1, $zero, 1 +; LA64F-NEXT: movgr2fr.w $fa1, $a1 +; LA64F-NEXT: ffint.s.w $fa1, $fa1 +; LA64F-NEXT: .p2align 4, , 16 +; LA64F-NEXT: .LBB19_1: # %atomicrmw.start +; LA64F-NEXT: # =>This Loop Header: Depth=1 +; LA64F-NEXT: # Child Loop BB19_3 Depth 2 +; LA64F-NEXT: fmax.s $fa2, $fa0, $fa0 +; LA64F-NEXT: fmax.s $fa2, $fa2, $fa1 +; LA64F-NEXT: movfr2gr.s $a1, $fa2 +; LA64F-NEXT: movfr2gr.s $a2, $fa0 +; LA64F-NEXT: addi.w $a2, $a2, 0 +; LA64F-NEXT: .LBB19_3: # %atomicrmw.start +; LA64F-NEXT: # Parent Loop BB19_1 Depth=1 +; LA64F-NEXT: # => This Inner Loop Header: Depth=2 +; LA64F-NEXT: ll.w $a3, $a0, 0 +; LA64F-NEXT: bne $a3, $a2, .LBB19_5 +; LA64F-NEXT: # %bb.4: # %atomicrmw.start +; LA64F-NEXT: # in Loop: Header=BB19_3 Depth=2 +; LA64F-NEXT: move $a4, $a1 +; LA64F-NEXT: sc.w $a4, $a0, 0 +; LA64F-NEXT: beqz $a4, .LBB19_3 +; LA64F-NEXT: b .LBB19_6 +; LA64F-NEXT: .LBB19_5: # %atomicrmw.start +; LA64F-NEXT: # in Loop: Header=BB19_1 Depth=1 +; LA64F-NEXT: dbar 20 +; LA64F-NEXT: .LBB19_6: # %atomicrmw.start +; LA64F-NEXT: # in Loop: Header=BB19_1 Depth=1 +; LA64F-NEXT: movgr2fr.w $fa0, $a3 +; LA64F-NEXT: bne $a3, $a2, .LBB19_1 +; LA64F-NEXT: # %bb.2: # %atomicrmw.end +; LA64F-NEXT: ret +; +; LA64D-LABEL: float_fmax_acq_rel: +; LA64D: # %bb.0: +; LA64D-NEXT: fld.s $fa0, $a0, 0 +; LA64D-NEXT: addi.w $a1, $zero, 1 +; LA64D-NEXT: movgr2fr.w $fa1, $a1 +; LA64D-NEXT: ffint.s.w $fa1, $fa1 +; LA64D-NEXT: .p2align 4, , 16 +; LA64D-NEXT: .LBB19_1: # %atomicrmw.start +; LA64D-NEXT: # =>This Loop Header: Depth=1 +; LA64D-NEXT: # Child Loop BB19_3 Depth 2 +; LA64D-NEXT: fmax.s $fa2, $fa0, $fa0 +; LA64D-NEXT: fmax.s $fa2, $fa2, $fa1 +; LA64D-NEXT: movfr2gr.s $a1, $fa2 +; LA64D-NEXT: movfr2gr.s $a2, $fa0 +; LA64D-NEXT: addi.w $a2, $a2, 0 +; LA64D-NEXT: .LBB19_3: # %atomicrmw.start +; LA64D-NEXT: # Parent Loop BB19_1 Depth=1 +; LA64D-NEXT: # => This Inner Loop Header: Depth=2 +; LA64D-NEXT: ll.w $a3, $a0, 0 +; LA64D-NEXT: bne $a3, $a2, .LBB19_5 +; LA64D-NEXT: # %bb.4: # %atomicrmw.start +; LA64D-NEXT: # in Loop: Header=BB19_3 Depth=2 +; LA64D-NEXT: move $a4, $a1 +; LA64D-NEXT: sc.w $a4, $a0, 0 +; LA64D-NEXT: beqz $a4, .LBB19_3 +; LA64D-NEXT: b .LBB19_6 +; LA64D-NEXT: .LBB19_5: # %atomicrmw.start +; LA64D-NEXT: # in Loop: Header=BB19_1 Depth=1 +; LA64D-NEXT: dbar 20 +; LA64D-NEXT: .LBB19_6: # %atomicrmw.start +; LA64D-NEXT: # in Loop: Header=BB19_1 Depth=1 +; LA64D-NEXT: movgr2fr.w $fa0, $a3 +; LA64D-NEXT: bne $a3, $a2, .LBB19_1 +; LA64D-NEXT: # %bb.2: # %atomicrmw.end +; LA64D-NEXT: ret + %v = atomicrmw fmax ptr %p, float 1.0 acq_rel, align 4 + ret float %v +} + +define double @double_fadd_acq_rel(ptr %p) nounwind { +; LA64F-LABEL: double_fadd_acq_rel: +; LA64F: # %bb.0: +; LA64F-NEXT: addi.d $sp, $sp, -80 +; LA64F-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill +; LA64F-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s4, $sp, 24 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s5, $sp, 16 # 8-byte Folded Spill +; LA64F-NEXT: move $fp, $a0 +; LA64F-NEXT: ld.d $a0, $a0, 0 +; LA64F-NEXT: lu52i.d $s0, $zero, 1023 +; LA64F-NEXT: ori $s1, $zero, 8 +; LA64F-NEXT: addi.d $s2, $sp, 8 +; LA64F-NEXT: addi.d $s3, $sp, 0 +; LA64F-NEXT: ori $s4, $zero, 4 +; LA64F-NEXT: ori $s5, $zero, 2 +; LA64F-NEXT: .p2align 4, , 16 +; LA64F-NEXT: .LBB20_1: # %atomicrmw.start +; LA64F-NEXT: # =>This Inner Loop Header: Depth=1 +; LA64F-NEXT: st.d $a0, $sp, 8 +; LA64F-NEXT: move $a1, $s0 +; LA64F-NEXT: bl %plt(__adddf3) +; LA64F-NEXT: st.d $a0, $sp, 0 +; LA64F-NEXT: move $a0, $s1 +; LA64F-NEXT: move $a1, $fp +; LA64F-NEXT: move $a2, $s2 +; LA64F-NEXT: move $a3, $s3 +; LA64F-NEXT: move $a4, $s4 +; LA64F-NEXT: move $a5, $s5 +; LA64F-NEXT: bl %plt(__atomic_compare_exchange) +; LA64F-NEXT: move $a1, $a0 +; LA64F-NEXT: ld.d $a0, $sp, 8 +; LA64F-NEXT: beqz $a1, .LBB20_1 +; LA64F-NEXT: # %bb.2: # %atomicrmw.end +; LA64F-NEXT: ld.d $s5, $sp, 16 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s4, $sp, 24 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload +; LA64F-NEXT: addi.d $sp, $sp, 80 +; LA64F-NEXT: ret +; +; LA64D-LABEL: double_fadd_acq_rel: +; LA64D: # %bb.0: +; LA64D-NEXT: addi.d $sp, $sp, -80 +; LA64D-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill +; LA64D-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill +; LA64D-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill +; LA64D-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill +; LA64D-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill +; LA64D-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill +; LA64D-NEXT: st.d $s4, $sp, 24 # 8-byte Folded Spill +; LA64D-NEXT: fst.d $fs0, $sp, 16 # 8-byte Folded Spill +; LA64D-NEXT: move $fp, $a0 +; LA64D-NEXT: fld.d $fa0, $a0, 0 +; LA64D-NEXT: addi.d $a0, $zero, 1 +; LA64D-NEXT: movgr2fr.d $fa1, $a0 +; LA64D-NEXT: ffint.d.l $fs0, $fa1 +; LA64D-NEXT: ori $s0, $zero, 8 +; LA64D-NEXT: addi.d $s1, $sp, 8 +; LA64D-NEXT: addi.d $s2, $sp, 0 +; LA64D-NEXT: ori $s3, $zero, 4 +; LA64D-NEXT: ori $s4, $zero, 2 +; LA64D-NEXT: .p2align 4, , 16 +; LA64D-NEXT: .LBB20_1: # %atomicrmw.start +; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 +; LA64D-NEXT: fst.d $fa0, $sp, 8 +; LA64D-NEXT: fadd.d $fa0, $fa0, $fs0 +; LA64D-NEXT: fst.d $fa0, $sp, 0 +; LA64D-NEXT: move $a0, $s0 +; LA64D-NEXT: move $a1, $fp +; LA64D-NEXT: move $a2, $s1 +; LA64D-NEXT: move $a3, $s2 +; LA64D-NEXT: move $a4, $s3 +; LA64D-NEXT: move $a5, $s4 +; LA64D-NEXT: bl %plt(__atomic_compare_exchange) +; LA64D-NEXT: fld.d $fa0, $sp, 8 +; LA64D-NEXT: beqz $a0, .LBB20_1 +; LA64D-NEXT: # %bb.2: # %atomicrmw.end +; LA64D-NEXT: fld.d $fs0, $sp, 16 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $s4, $sp, 24 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload +; LA64D-NEXT: addi.d $sp, $sp, 80 +; LA64D-NEXT: ret + %v = atomicrmw fadd ptr %p, double 1.0 acq_rel, align 4 + ret double %v +} + +define double @double_fsub_acq_rel(ptr %p) nounwind { +; LA64F-LABEL: double_fsub_acq_rel: +; LA64F: # %bb.0: +; LA64F-NEXT: addi.d $sp, $sp, -80 +; LA64F-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill +; LA64F-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s4, $sp, 24 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s5, $sp, 16 # 8-byte Folded Spill +; LA64F-NEXT: move $fp, $a0 +; LA64F-NEXT: ld.d $a0, $a0, 0 +; LA64F-NEXT: lu52i.d $s0, $zero, -1025 +; LA64F-NEXT: ori $s1, $zero, 8 +; LA64F-NEXT: addi.d $s2, $sp, 8 +; LA64F-NEXT: addi.d $s3, $sp, 0 +; LA64F-NEXT: ori $s4, $zero, 4 +; LA64F-NEXT: ori $s5, $zero, 2 +; LA64F-NEXT: .p2align 4, , 16 +; LA64F-NEXT: .LBB21_1: # %atomicrmw.start +; LA64F-NEXT: # =>This Inner Loop Header: Depth=1 +; LA64F-NEXT: st.d $a0, $sp, 8 +; LA64F-NEXT: move $a1, $s0 +; LA64F-NEXT: bl %plt(__adddf3) +; LA64F-NEXT: st.d $a0, $sp, 0 +; LA64F-NEXT: move $a0, $s1 +; LA64F-NEXT: move $a1, $fp +; LA64F-NEXT: move $a2, $s2 +; LA64F-NEXT: move $a3, $s3 +; LA64F-NEXT: move $a4, $s4 +; LA64F-NEXT: move $a5, $s5 +; LA64F-NEXT: bl %plt(__atomic_compare_exchange) +; LA64F-NEXT: move $a1, $a0 +; LA64F-NEXT: ld.d $a0, $sp, 8 +; LA64F-NEXT: beqz $a1, .LBB21_1 +; LA64F-NEXT: # %bb.2: # %atomicrmw.end +; LA64F-NEXT: ld.d $s5, $sp, 16 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s4, $sp, 24 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload +; LA64F-NEXT: addi.d $sp, $sp, 80 +; LA64F-NEXT: ret +; +; LA64D-LABEL: double_fsub_acq_rel: +; LA64D: # %bb.0: +; LA64D-NEXT: addi.d $sp, $sp, -80 +; LA64D-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill +; LA64D-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill +; LA64D-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill +; LA64D-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill +; LA64D-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill +; LA64D-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill +; LA64D-NEXT: st.d $s4, $sp, 24 # 8-byte Folded Spill +; LA64D-NEXT: fst.d $fs0, $sp, 16 # 8-byte Folded Spill +; LA64D-NEXT: move $fp, $a0 +; LA64D-NEXT: fld.d $fa0, $a0, 0 +; LA64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI21_0) +; LA64D-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI21_0) +; LA64D-NEXT: fld.d $fs0, $a0, 0 +; LA64D-NEXT: ori $s0, $zero, 8 +; LA64D-NEXT: addi.d $s1, $sp, 8 +; LA64D-NEXT: addi.d $s2, $sp, 0 +; LA64D-NEXT: ori $s3, $zero, 4 +; LA64D-NEXT: ori $s4, $zero, 2 +; LA64D-NEXT: .p2align 4, , 16 +; LA64D-NEXT: .LBB21_1: # %atomicrmw.start +; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 +; LA64D-NEXT: fst.d $fa0, $sp, 8 +; LA64D-NEXT: fadd.d $fa0, $fa0, $fs0 +; LA64D-NEXT: fst.d $fa0, $sp, 0 +; LA64D-NEXT: move $a0, $s0 +; LA64D-NEXT: move $a1, $fp +; LA64D-NEXT: move $a2, $s1 +; LA64D-NEXT: move $a3, $s2 +; LA64D-NEXT: move $a4, $s3 +; LA64D-NEXT: move $a5, $s4 +; LA64D-NEXT: bl %plt(__atomic_compare_exchange) +; LA64D-NEXT: fld.d $fa0, $sp, 8 +; LA64D-NEXT: beqz $a0, .LBB21_1 +; LA64D-NEXT: # %bb.2: # %atomicrmw.end +; LA64D-NEXT: fld.d $fs0, $sp, 16 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $s4, $sp, 24 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload +; LA64D-NEXT: addi.d $sp, $sp, 80 +; LA64D-NEXT: ret + %v = atomicrmw fsub ptr %p, double 1.0 acq_rel, align 4 + ret double %v +} + +define double @double_fmin_acq_rel(ptr %p) nounwind { +; LA64F-LABEL: double_fmin_acq_rel: +; LA64F: # %bb.0: +; LA64F-NEXT: addi.d $sp, $sp, -80 +; LA64F-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill +; LA64F-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s4, $sp, 24 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s5, $sp, 16 # 8-byte Folded Spill +; LA64F-NEXT: move $fp, $a0 +; LA64F-NEXT: ld.d $a0, $a0, 0 +; LA64F-NEXT: lu52i.d $s0, $zero, 1023 +; LA64F-NEXT: ori $s1, $zero, 8 +; LA64F-NEXT: addi.d $s2, $sp, 8 +; LA64F-NEXT: addi.d $s3, $sp, 0 +; LA64F-NEXT: ori $s4, $zero, 4 +; LA64F-NEXT: ori $s5, $zero, 2 +; LA64F-NEXT: .p2align 4, , 16 +; LA64F-NEXT: .LBB22_1: # %atomicrmw.start +; LA64F-NEXT: # =>This Inner Loop Header: Depth=1 +; LA64F-NEXT: st.d $a0, $sp, 8 +; LA64F-NEXT: move $a1, $s0 +; LA64F-NEXT: bl %plt(fmin) +; LA64F-NEXT: st.d $a0, $sp, 0 +; LA64F-NEXT: move $a0, $s1 +; LA64F-NEXT: move $a1, $fp +; LA64F-NEXT: move $a2, $s2 +; LA64F-NEXT: move $a3, $s3 +; LA64F-NEXT: move $a4, $s4 +; LA64F-NEXT: move $a5, $s5 +; LA64F-NEXT: bl %plt(__atomic_compare_exchange) +; LA64F-NEXT: move $a1, $a0 +; LA64F-NEXT: ld.d $a0, $sp, 8 +; LA64F-NEXT: beqz $a1, .LBB22_1 +; LA64F-NEXT: # %bb.2: # %atomicrmw.end +; LA64F-NEXT: ld.d $s5, $sp, 16 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s4, $sp, 24 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload +; LA64F-NEXT: addi.d $sp, $sp, 80 +; LA64F-NEXT: ret +; +; LA64D-LABEL: double_fmin_acq_rel: +; LA64D: # %bb.0: +; LA64D-NEXT: addi.d $sp, $sp, -80 +; LA64D-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill +; LA64D-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill +; LA64D-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill +; LA64D-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill +; LA64D-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill +; LA64D-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill +; LA64D-NEXT: st.d $s4, $sp, 24 # 8-byte Folded Spill +; LA64D-NEXT: fst.d $fs0, $sp, 16 # 8-byte Folded Spill +; LA64D-NEXT: move $fp, $a0 +; LA64D-NEXT: fld.d $fa0, $a0, 0 +; LA64D-NEXT: addi.d $a0, $zero, 1 +; LA64D-NEXT: movgr2fr.d $fa1, $a0 +; LA64D-NEXT: ffint.d.l $fs0, $fa1 +; LA64D-NEXT: ori $s0, $zero, 8 +; LA64D-NEXT: addi.d $s1, $sp, 8 +; LA64D-NEXT: addi.d $s2, $sp, 0 +; LA64D-NEXT: ori $s3, $zero, 4 +; LA64D-NEXT: ori $s4, $zero, 2 +; LA64D-NEXT: .p2align 4, , 16 +; LA64D-NEXT: .LBB22_1: # %atomicrmw.start +; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 +; LA64D-NEXT: fst.d $fa0, $sp, 8 +; LA64D-NEXT: fmax.d $fa0, $fa0, $fa0 +; LA64D-NEXT: fmin.d $fa0, $fa0, $fs0 +; LA64D-NEXT: fst.d $fa0, $sp, 0 +; LA64D-NEXT: move $a0, $s0 +; LA64D-NEXT: move $a1, $fp +; LA64D-NEXT: move $a2, $s1 +; LA64D-NEXT: move $a3, $s2 +; LA64D-NEXT: move $a4, $s3 +; LA64D-NEXT: move $a5, $s4 +; LA64D-NEXT: bl %plt(__atomic_compare_exchange) +; LA64D-NEXT: fld.d $fa0, $sp, 8 +; LA64D-NEXT: beqz $a0, .LBB22_1 +; LA64D-NEXT: # %bb.2: # %atomicrmw.end +; LA64D-NEXT: fld.d $fs0, $sp, 16 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $s4, $sp, 24 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload +; LA64D-NEXT: addi.d $sp, $sp, 80 +; LA64D-NEXT: ret + %v = atomicrmw fmin ptr %p, double 1.0 acq_rel, align 4 + ret double %v +} + +define double @double_fmax_acq_rel(ptr %p) nounwind { +; LA64F-LABEL: double_fmax_acq_rel: +; LA64F: # %bb.0: +; LA64F-NEXT: addi.d $sp, $sp, -80 +; LA64F-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill +; LA64F-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s4, $sp, 24 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s5, $sp, 16 # 8-byte Folded Spill +; LA64F-NEXT: move $fp, $a0 +; LA64F-NEXT: ld.d $a0, $a0, 0 +; LA64F-NEXT: lu52i.d $s0, $zero, 1023 +; LA64F-NEXT: ori $s1, $zero, 8 +; LA64F-NEXT: addi.d $s2, $sp, 8 +; LA64F-NEXT: addi.d $s3, $sp, 0 +; LA64F-NEXT: ori $s4, $zero, 4 +; LA64F-NEXT: ori $s5, $zero, 2 +; LA64F-NEXT: .p2align 4, , 16 +; LA64F-NEXT: .LBB23_1: # %atomicrmw.start +; LA64F-NEXT: # =>This Inner Loop Header: Depth=1 +; LA64F-NEXT: st.d $a0, $sp, 8 +; LA64F-NEXT: move $a1, $s0 +; LA64F-NEXT: bl %plt(fmax) +; LA64F-NEXT: st.d $a0, $sp, 0 +; LA64F-NEXT: move $a0, $s1 +; LA64F-NEXT: move $a1, $fp +; LA64F-NEXT: move $a2, $s2 +; LA64F-NEXT: move $a3, $s3 +; LA64F-NEXT: move $a4, $s4 +; LA64F-NEXT: move $a5, $s5 +; LA64F-NEXT: bl %plt(__atomic_compare_exchange) +; LA64F-NEXT: move $a1, $a0 +; LA64F-NEXT: ld.d $a0, $sp, 8 +; LA64F-NEXT: beqz $a1, .LBB23_1 +; LA64F-NEXT: # %bb.2: # %atomicrmw.end +; LA64F-NEXT: ld.d $s5, $sp, 16 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s4, $sp, 24 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload +; LA64F-NEXT: addi.d $sp, $sp, 80 +; LA64F-NEXT: ret +; +; LA64D-LABEL: double_fmax_acq_rel: +; LA64D: # %bb.0: +; LA64D-NEXT: addi.d $sp, $sp, -80 +; LA64D-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill +; LA64D-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill +; LA64D-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill +; LA64D-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill +; LA64D-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill +; LA64D-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill +; LA64D-NEXT: st.d $s4, $sp, 24 # 8-byte Folded Spill +; LA64D-NEXT: fst.d $fs0, $sp, 16 # 8-byte Folded Spill +; LA64D-NEXT: move $fp, $a0 +; LA64D-NEXT: fld.d $fa0, $a0, 0 +; LA64D-NEXT: addi.d $a0, $zero, 1 +; LA64D-NEXT: movgr2fr.d $fa1, $a0 +; LA64D-NEXT: ffint.d.l $fs0, $fa1 +; LA64D-NEXT: ori $s0, $zero, 8 +; LA64D-NEXT: addi.d $s1, $sp, 8 +; LA64D-NEXT: addi.d $s2, $sp, 0 +; LA64D-NEXT: ori $s3, $zero, 4 +; LA64D-NEXT: ori $s4, $zero, 2 +; LA64D-NEXT: .p2align 4, , 16 +; LA64D-NEXT: .LBB23_1: # %atomicrmw.start +; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 +; LA64D-NEXT: fst.d $fa0, $sp, 8 +; LA64D-NEXT: fmax.d $fa0, $fa0, $fa0 +; LA64D-NEXT: fmax.d $fa0, $fa0, $fs0 +; LA64D-NEXT: fst.d $fa0, $sp, 0 +; LA64D-NEXT: move $a0, $s0 +; LA64D-NEXT: move $a1, $fp +; LA64D-NEXT: move $a2, $s1 +; LA64D-NEXT: move $a3, $s2 +; LA64D-NEXT: move $a4, $s3 +; LA64D-NEXT: move $a5, $s4 +; LA64D-NEXT: bl %plt(__atomic_compare_exchange) +; LA64D-NEXT: fld.d $fa0, $sp, 8 +; LA64D-NEXT: beqz $a0, .LBB23_1 +; LA64D-NEXT: # %bb.2: # %atomicrmw.end +; LA64D-NEXT: fld.d $fs0, $sp, 16 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $s4, $sp, 24 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload +; LA64D-NEXT: addi.d $sp, $sp, 80 +; LA64D-NEXT: ret + %v = atomicrmw fmax ptr %p, double 1.0 acq_rel, align 4 + ret double %v +} + +define float @float_fadd_seq_cst(ptr %p) nounwind { +; LA64F-LABEL: float_fadd_seq_cst: +; LA64F: # %bb.0: +; LA64F-NEXT: fld.s $fa0, $a0, 0 +; LA64F-NEXT: addi.w $a1, $zero, 1 +; LA64F-NEXT: movgr2fr.w $fa1, $a1 +; LA64F-NEXT: ffint.s.w $fa1, $fa1 +; LA64F-NEXT: .p2align 4, , 16 +; LA64F-NEXT: .LBB24_1: # %atomicrmw.start +; LA64F-NEXT: # =>This Loop Header: Depth=1 +; LA64F-NEXT: # Child Loop BB24_3 Depth 2 +; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 +; LA64F-NEXT: movfr2gr.s $a1, $fa2 +; LA64F-NEXT: movfr2gr.s $a2, $fa0 +; LA64F-NEXT: addi.w $a2, $a2, 0 +; LA64F-NEXT: .LBB24_3: # %atomicrmw.start +; LA64F-NEXT: # Parent Loop BB24_1 Depth=1 +; LA64F-NEXT: # => This Inner Loop Header: Depth=2 +; LA64F-NEXT: ll.w $a3, $a0, 0 +; LA64F-NEXT: bne $a3, $a2, .LBB24_5 +; LA64F-NEXT: # %bb.4: # %atomicrmw.start +; LA64F-NEXT: # in Loop: Header=BB24_3 Depth=2 +; LA64F-NEXT: move $a4, $a1 +; LA64F-NEXT: sc.w $a4, $a0, 0 +; LA64F-NEXT: beqz $a4, .LBB24_3 +; LA64F-NEXT: b .LBB24_6 +; LA64F-NEXT: .LBB24_5: # %atomicrmw.start +; LA64F-NEXT: # in Loop: Header=BB24_1 Depth=1 +; LA64F-NEXT: dbar 20 +; LA64F-NEXT: .LBB24_6: # %atomicrmw.start +; LA64F-NEXT: # in Loop: Header=BB24_1 Depth=1 +; LA64F-NEXT: movgr2fr.w $fa0, $a3 +; LA64F-NEXT: bne $a3, $a2, .LBB24_1 +; LA64F-NEXT: # %bb.2: # %atomicrmw.end +; LA64F-NEXT: ret +; +; LA64D-LABEL: float_fadd_seq_cst: +; LA64D: # %bb.0: +; LA64D-NEXT: fld.s $fa0, $a0, 0 +; LA64D-NEXT: addi.w $a1, $zero, 1 +; LA64D-NEXT: movgr2fr.w $fa1, $a1 +; LA64D-NEXT: ffint.s.w $fa1, $fa1 +; LA64D-NEXT: .p2align 4, , 16 +; LA64D-NEXT: .LBB24_1: # %atomicrmw.start +; LA64D-NEXT: # =>This Loop Header: Depth=1 +; LA64D-NEXT: # Child Loop BB24_3 Depth 2 +; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 +; LA64D-NEXT: movfr2gr.s $a1, $fa2 +; LA64D-NEXT: movfr2gr.s $a2, $fa0 +; LA64D-NEXT: addi.w $a2, $a2, 0 +; LA64D-NEXT: .LBB24_3: # %atomicrmw.start +; LA64D-NEXT: # Parent Loop BB24_1 Depth=1 +; LA64D-NEXT: # => This Inner Loop Header: Depth=2 +; LA64D-NEXT: ll.w $a3, $a0, 0 +; LA64D-NEXT: bne $a3, $a2, .LBB24_5 +; LA64D-NEXT: # %bb.4: # %atomicrmw.start +; LA64D-NEXT: # in Loop: Header=BB24_3 Depth=2 +; LA64D-NEXT: move $a4, $a1 +; LA64D-NEXT: sc.w $a4, $a0, 0 +; LA64D-NEXT: beqz $a4, .LBB24_3 +; LA64D-NEXT: b .LBB24_6 +; LA64D-NEXT: .LBB24_5: # %atomicrmw.start +; LA64D-NEXT: # in Loop: Header=BB24_1 Depth=1 +; LA64D-NEXT: dbar 20 +; LA64D-NEXT: .LBB24_6: # %atomicrmw.start +; LA64D-NEXT: # in Loop: Header=BB24_1 Depth=1 +; LA64D-NEXT: movgr2fr.w $fa0, $a3 +; LA64D-NEXT: bne $a3, $a2, .LBB24_1 +; LA64D-NEXT: # %bb.2: # %atomicrmw.end +; LA64D-NEXT: ret + %v = atomicrmw fadd ptr %p, float 1.0 seq_cst, align 4 + ret float %v +} + +define float @float_fsub_seq_cst(ptr %p) nounwind { +; LA64F-LABEL: float_fsub_seq_cst: +; LA64F: # %bb.0: +; LA64F-NEXT: fld.s $fa0, $a0, 0 +; LA64F-NEXT: pcalau12i $a1, %pc_hi20(.LCPI25_0) +; LA64F-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI25_0) +; LA64F-NEXT: fld.s $fa1, $a1, 0 +; LA64F-NEXT: .p2align 4, , 16 +; LA64F-NEXT: .LBB25_1: # %atomicrmw.start +; LA64F-NEXT: # =>This Loop Header: Depth=1 +; LA64F-NEXT: # Child Loop BB25_3 Depth 2 +; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 +; LA64F-NEXT: movfr2gr.s $a1, $fa2 +; LA64F-NEXT: movfr2gr.s $a2, $fa0 +; LA64F-NEXT: addi.w $a2, $a2, 0 +; LA64F-NEXT: .LBB25_3: # %atomicrmw.start +; LA64F-NEXT: # Parent Loop BB25_1 Depth=1 +; LA64F-NEXT: # => This Inner Loop Header: Depth=2 +; LA64F-NEXT: ll.w $a3, $a0, 0 +; LA64F-NEXT: bne $a3, $a2, .LBB25_5 +; LA64F-NEXT: # %bb.4: # %atomicrmw.start +; LA64F-NEXT: # in Loop: Header=BB25_3 Depth=2 +; LA64F-NEXT: move $a4, $a1 +; LA64F-NEXT: sc.w $a4, $a0, 0 +; LA64F-NEXT: beqz $a4, .LBB25_3 +; LA64F-NEXT: b .LBB25_6 +; LA64F-NEXT: .LBB25_5: # %atomicrmw.start +; LA64F-NEXT: # in Loop: Header=BB25_1 Depth=1 +; LA64F-NEXT: dbar 20 +; LA64F-NEXT: .LBB25_6: # %atomicrmw.start +; LA64F-NEXT: # in Loop: Header=BB25_1 Depth=1 +; LA64F-NEXT: movgr2fr.w $fa0, $a3 +; LA64F-NEXT: bne $a3, $a2, .LBB25_1 +; LA64F-NEXT: # %bb.2: # %atomicrmw.end +; LA64F-NEXT: ret +; +; LA64D-LABEL: float_fsub_seq_cst: +; LA64D: # %bb.0: +; LA64D-NEXT: fld.s $fa0, $a0, 0 +; LA64D-NEXT: pcalau12i $a1, %pc_hi20(.LCPI25_0) +; LA64D-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI25_0) +; LA64D-NEXT: fld.s $fa1, $a1, 0 +; LA64D-NEXT: .p2align 4, , 16 +; LA64D-NEXT: .LBB25_1: # %atomicrmw.start +; LA64D-NEXT: # =>This Loop Header: Depth=1 +; LA64D-NEXT: # Child Loop BB25_3 Depth 2 +; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 +; LA64D-NEXT: movfr2gr.s $a1, $fa2 +; LA64D-NEXT: movfr2gr.s $a2, $fa0 +; LA64D-NEXT: addi.w $a2, $a2, 0 +; LA64D-NEXT: .LBB25_3: # %atomicrmw.start +; LA64D-NEXT: # Parent Loop BB25_1 Depth=1 +; LA64D-NEXT: # => This Inner Loop Header: Depth=2 +; LA64D-NEXT: ll.w $a3, $a0, 0 +; LA64D-NEXT: bne $a3, $a2, .LBB25_5 +; LA64D-NEXT: # %bb.4: # %atomicrmw.start +; LA64D-NEXT: # in Loop: Header=BB25_3 Depth=2 +; LA64D-NEXT: move $a4, $a1 +; LA64D-NEXT: sc.w $a4, $a0, 0 +; LA64D-NEXT: beqz $a4, .LBB25_3 +; LA64D-NEXT: b .LBB25_6 +; LA64D-NEXT: .LBB25_5: # %atomicrmw.start +; LA64D-NEXT: # in Loop: Header=BB25_1 Depth=1 +; LA64D-NEXT: dbar 20 +; LA64D-NEXT: .LBB25_6: # %atomicrmw.start +; LA64D-NEXT: # in Loop: Header=BB25_1 Depth=1 +; LA64D-NEXT: movgr2fr.w $fa0, $a3 +; LA64D-NEXT: bne $a3, $a2, .LBB25_1 +; LA64D-NEXT: # %bb.2: # %atomicrmw.end +; LA64D-NEXT: ret + %v = atomicrmw fsub ptr %p, float 1.0 seq_cst, align 4 + ret float %v +} + +define float @float_fmin_seq_cst(ptr %p) nounwind { +; LA64F-LABEL: float_fmin_seq_cst: +; LA64F: # %bb.0: +; LA64F-NEXT: fld.s $fa0, $a0, 0 +; LA64F-NEXT: addi.w $a1, $zero, 1 +; LA64F-NEXT: movgr2fr.w $fa1, $a1 +; LA64F-NEXT: ffint.s.w $fa1, $fa1 +; LA64F-NEXT: .p2align 4, , 16 +; LA64F-NEXT: .LBB26_1: # %atomicrmw.start +; LA64F-NEXT: # =>This Loop Header: Depth=1 +; LA64F-NEXT: # Child Loop BB26_3 Depth 2 +; LA64F-NEXT: fmax.s $fa2, $fa0, $fa0 +; LA64F-NEXT: fmin.s $fa2, $fa2, $fa1 +; LA64F-NEXT: movfr2gr.s $a1, $fa2 +; LA64F-NEXT: movfr2gr.s $a2, $fa0 +; LA64F-NEXT: addi.w $a2, $a2, 0 +; LA64F-NEXT: .LBB26_3: # %atomicrmw.start +; LA64F-NEXT: # Parent Loop BB26_1 Depth=1 +; LA64F-NEXT: # => This Inner Loop Header: Depth=2 +; LA64F-NEXT: ll.w $a3, $a0, 0 +; LA64F-NEXT: bne $a3, $a2, .LBB26_5 +; LA64F-NEXT: # %bb.4: # %atomicrmw.start +; LA64F-NEXT: # in Loop: Header=BB26_3 Depth=2 +; LA64F-NEXT: move $a4, $a1 +; LA64F-NEXT: sc.w $a4, $a0, 0 +; LA64F-NEXT: beqz $a4, .LBB26_3 +; LA64F-NEXT: b .LBB26_6 +; LA64F-NEXT: .LBB26_5: # %atomicrmw.start +; LA64F-NEXT: # in Loop: Header=BB26_1 Depth=1 +; LA64F-NEXT: dbar 20 +; LA64F-NEXT: .LBB26_6: # %atomicrmw.start +; LA64F-NEXT: # in Loop: Header=BB26_1 Depth=1 +; LA64F-NEXT: movgr2fr.w $fa0, $a3 +; LA64F-NEXT: bne $a3, $a2, .LBB26_1 +; LA64F-NEXT: # %bb.2: # %atomicrmw.end +; LA64F-NEXT: ret +; +; LA64D-LABEL: float_fmin_seq_cst: +; LA64D: # %bb.0: +; LA64D-NEXT: fld.s $fa0, $a0, 0 +; LA64D-NEXT: addi.w $a1, $zero, 1 +; LA64D-NEXT: movgr2fr.w $fa1, $a1 +; LA64D-NEXT: ffint.s.w $fa1, $fa1 +; LA64D-NEXT: .p2align 4, , 16 +; LA64D-NEXT: .LBB26_1: # %atomicrmw.start +; LA64D-NEXT: # =>This Loop Header: Depth=1 +; LA64D-NEXT: # Child Loop BB26_3 Depth 2 +; LA64D-NEXT: fmax.s $fa2, $fa0, $fa0 +; LA64D-NEXT: fmin.s $fa2, $fa2, $fa1 +; LA64D-NEXT: movfr2gr.s $a1, $fa2 +; LA64D-NEXT: movfr2gr.s $a2, $fa0 +; LA64D-NEXT: addi.w $a2, $a2, 0 +; LA64D-NEXT: .LBB26_3: # %atomicrmw.start +; LA64D-NEXT: # Parent Loop BB26_1 Depth=1 +; LA64D-NEXT: # => This Inner Loop Header: Depth=2 +; LA64D-NEXT: ll.w $a3, $a0, 0 +; LA64D-NEXT: bne $a3, $a2, .LBB26_5 +; LA64D-NEXT: # %bb.4: # %atomicrmw.start +; LA64D-NEXT: # in Loop: Header=BB26_3 Depth=2 +; LA64D-NEXT: move $a4, $a1 +; LA64D-NEXT: sc.w $a4, $a0, 0 +; LA64D-NEXT: beqz $a4, .LBB26_3 +; LA64D-NEXT: b .LBB26_6 +; LA64D-NEXT: .LBB26_5: # %atomicrmw.start +; LA64D-NEXT: # in Loop: Header=BB26_1 Depth=1 +; LA64D-NEXT: dbar 20 +; LA64D-NEXT: .LBB26_6: # %atomicrmw.start +; LA64D-NEXT: # in Loop: Header=BB26_1 Depth=1 +; LA64D-NEXT: movgr2fr.w $fa0, $a3 +; LA64D-NEXT: bne $a3, $a2, .LBB26_1 +; LA64D-NEXT: # %bb.2: # %atomicrmw.end +; LA64D-NEXT: ret + %v = atomicrmw fmin ptr %p, float 1.0 seq_cst, align 4 + ret float %v +} + +define float @float_fmax_seq_cst(ptr %p) nounwind { +; LA64F-LABEL: float_fmax_seq_cst: +; LA64F: # %bb.0: +; LA64F-NEXT: fld.s $fa0, $a0, 0 +; LA64F-NEXT: addi.w $a1, $zero, 1 +; LA64F-NEXT: movgr2fr.w $fa1, $a1 +; LA64F-NEXT: ffint.s.w $fa1, $fa1 +; LA64F-NEXT: .p2align 4, , 16 +; LA64F-NEXT: .LBB27_1: # %atomicrmw.start +; LA64F-NEXT: # =>This Loop Header: Depth=1 +; LA64F-NEXT: # Child Loop BB27_3 Depth 2 +; LA64F-NEXT: fmax.s $fa2, $fa0, $fa0 +; LA64F-NEXT: fmax.s $fa2, $fa2, $fa1 +; LA64F-NEXT: movfr2gr.s $a1, $fa2 +; LA64F-NEXT: movfr2gr.s $a2, $fa0 +; LA64F-NEXT: addi.w $a2, $a2, 0 +; LA64F-NEXT: .LBB27_3: # %atomicrmw.start +; LA64F-NEXT: # Parent Loop BB27_1 Depth=1 +; LA64F-NEXT: # => This Inner Loop Header: Depth=2 +; LA64F-NEXT: ll.w $a3, $a0, 0 +; LA64F-NEXT: bne $a3, $a2, .LBB27_5 +; LA64F-NEXT: # %bb.4: # %atomicrmw.start +; LA64F-NEXT: # in Loop: Header=BB27_3 Depth=2 +; LA64F-NEXT: move $a4, $a1 +; LA64F-NEXT: sc.w $a4, $a0, 0 +; LA64F-NEXT: beqz $a4, .LBB27_3 +; LA64F-NEXT: b .LBB27_6 +; LA64F-NEXT: .LBB27_5: # %atomicrmw.start +; LA64F-NEXT: # in Loop: Header=BB27_1 Depth=1 +; LA64F-NEXT: dbar 20 +; LA64F-NEXT: .LBB27_6: # %atomicrmw.start +; LA64F-NEXT: # in Loop: Header=BB27_1 Depth=1 +; LA64F-NEXT: movgr2fr.w $fa0, $a3 +; LA64F-NEXT: bne $a3, $a2, .LBB27_1 +; LA64F-NEXT: # %bb.2: # %atomicrmw.end +; LA64F-NEXT: ret +; +; LA64D-LABEL: float_fmax_seq_cst: +; LA64D: # %bb.0: +; LA64D-NEXT: fld.s $fa0, $a0, 0 +; LA64D-NEXT: addi.w $a1, $zero, 1 +; LA64D-NEXT: movgr2fr.w $fa1, $a1 +; LA64D-NEXT: ffint.s.w $fa1, $fa1 +; LA64D-NEXT: .p2align 4, , 16 +; LA64D-NEXT: .LBB27_1: # %atomicrmw.start +; LA64D-NEXT: # =>This Loop Header: Depth=1 +; LA64D-NEXT: # Child Loop BB27_3 Depth 2 +; LA64D-NEXT: fmax.s $fa2, $fa0, $fa0 +; LA64D-NEXT: fmax.s $fa2, $fa2, $fa1 +; LA64D-NEXT: movfr2gr.s $a1, $fa2 +; LA64D-NEXT: movfr2gr.s $a2, $fa0 +; LA64D-NEXT: addi.w $a2, $a2, 0 +; LA64D-NEXT: .LBB27_3: # %atomicrmw.start +; LA64D-NEXT: # Parent Loop BB27_1 Depth=1 +; LA64D-NEXT: # => This Inner Loop Header: Depth=2 +; LA64D-NEXT: ll.w $a3, $a0, 0 +; LA64D-NEXT: bne $a3, $a2, .LBB27_5 +; LA64D-NEXT: # %bb.4: # %atomicrmw.start +; LA64D-NEXT: # in Loop: Header=BB27_3 Depth=2 +; LA64D-NEXT: move $a4, $a1 +; LA64D-NEXT: sc.w $a4, $a0, 0 +; LA64D-NEXT: beqz $a4, .LBB27_3 +; LA64D-NEXT: b .LBB27_6 +; LA64D-NEXT: .LBB27_5: # %atomicrmw.start +; LA64D-NEXT: # in Loop: Header=BB27_1 Depth=1 +; LA64D-NEXT: dbar 20 +; LA64D-NEXT: .LBB27_6: # %atomicrmw.start +; LA64D-NEXT: # in Loop: Header=BB27_1 Depth=1 +; LA64D-NEXT: movgr2fr.w $fa0, $a3 +; LA64D-NEXT: bne $a3, $a2, .LBB27_1 +; LA64D-NEXT: # %bb.2: # %atomicrmw.end +; LA64D-NEXT: ret + %v = atomicrmw fmax ptr %p, float 1.0 seq_cst, align 4 + ret float %v +} + +define double @double_fadd_seq_cst(ptr %p) nounwind { +; LA64F-LABEL: double_fadd_seq_cst: +; LA64F: # %bb.0: +; LA64F-NEXT: addi.d $sp, $sp, -80 +; LA64F-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill +; LA64F-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s4, $sp, 24 # 8-byte Folded Spill +; LA64F-NEXT: move $fp, $a0 +; LA64F-NEXT: ld.d $a0, $a0, 0 +; LA64F-NEXT: lu52i.d $s0, $zero, 1023 +; LA64F-NEXT: ori $s1, $zero, 8 +; LA64F-NEXT: addi.d $s2, $sp, 16 +; LA64F-NEXT: addi.d $s3, $sp, 8 +; LA64F-NEXT: ori $s4, $zero, 5 +; LA64F-NEXT: .p2align 4, , 16 +; LA64F-NEXT: .LBB28_1: # %atomicrmw.start +; LA64F-NEXT: # =>This Inner Loop Header: Depth=1 +; LA64F-NEXT: st.d $a0, $sp, 16 +; LA64F-NEXT: move $a1, $s0 +; LA64F-NEXT: bl %plt(__adddf3) +; LA64F-NEXT: st.d $a0, $sp, 8 +; LA64F-NEXT: move $a0, $s1 +; LA64F-NEXT: move $a1, $fp +; LA64F-NEXT: move $a2, $s2 +; LA64F-NEXT: move $a3, $s3 +; LA64F-NEXT: move $a4, $s4 +; LA64F-NEXT: move $a5, $s4 +; LA64F-NEXT: bl %plt(__atomic_compare_exchange) +; LA64F-NEXT: move $a1, $a0 +; LA64F-NEXT: ld.d $a0, $sp, 16 +; LA64F-NEXT: beqz $a1, .LBB28_1 +; LA64F-NEXT: # %bb.2: # %atomicrmw.end +; LA64F-NEXT: ld.d $s4, $sp, 24 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload +; LA64F-NEXT: addi.d $sp, $sp, 80 +; LA64F-NEXT: ret +; +; LA64D-LABEL: double_fadd_seq_cst: +; LA64D: # %bb.0: +; LA64D-NEXT: addi.d $sp, $sp, -80 +; LA64D-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill +; LA64D-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill +; LA64D-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill +; LA64D-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill +; LA64D-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill +; LA64D-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill +; LA64D-NEXT: fst.d $fs0, $sp, 24 # 8-byte Folded Spill +; LA64D-NEXT: move $fp, $a0 +; LA64D-NEXT: fld.d $fa0, $a0, 0 +; LA64D-NEXT: addi.d $a0, $zero, 1 +; LA64D-NEXT: movgr2fr.d $fa1, $a0 +; LA64D-NEXT: ffint.d.l $fs0, $fa1 +; LA64D-NEXT: ori $s0, $zero, 8 +; LA64D-NEXT: addi.d $s1, $sp, 16 +; LA64D-NEXT: addi.d $s2, $sp, 8 +; LA64D-NEXT: ori $s3, $zero, 5 +; LA64D-NEXT: .p2align 4, , 16 +; LA64D-NEXT: .LBB28_1: # %atomicrmw.start +; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 +; LA64D-NEXT: fst.d $fa0, $sp, 16 +; LA64D-NEXT: fadd.d $fa0, $fa0, $fs0 +; LA64D-NEXT: fst.d $fa0, $sp, 8 +; LA64D-NEXT: move $a0, $s0 +; LA64D-NEXT: move $a1, $fp +; LA64D-NEXT: move $a2, $s1 +; LA64D-NEXT: move $a3, $s2 +; LA64D-NEXT: move $a4, $s3 +; LA64D-NEXT: move $a5, $s3 +; LA64D-NEXT: bl %plt(__atomic_compare_exchange) +; LA64D-NEXT: fld.d $fa0, $sp, 16 +; LA64D-NEXT: beqz $a0, .LBB28_1 +; LA64D-NEXT: # %bb.2: # %atomicrmw.end +; LA64D-NEXT: fld.d $fs0, $sp, 24 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload +; LA64D-NEXT: addi.d $sp, $sp, 80 +; LA64D-NEXT: ret + %v = atomicrmw fadd ptr %p, double 1.0 seq_cst, align 4 + ret double %v +} + +define double @double_fsub_seq_cst(ptr %p) nounwind { +; LA64F-LABEL: double_fsub_seq_cst: +; LA64F: # %bb.0: +; LA64F-NEXT: addi.d $sp, $sp, -80 +; LA64F-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill +; LA64F-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s4, $sp, 24 # 8-byte Folded Spill +; LA64F-NEXT: move $fp, $a0 +; LA64F-NEXT: ld.d $a0, $a0, 0 +; LA64F-NEXT: lu52i.d $s0, $zero, -1025 +; LA64F-NEXT: ori $s1, $zero, 8 +; LA64F-NEXT: addi.d $s2, $sp, 16 +; LA64F-NEXT: addi.d $s3, $sp, 8 +; LA64F-NEXT: ori $s4, $zero, 5 +; LA64F-NEXT: .p2align 4, , 16 +; LA64F-NEXT: .LBB29_1: # %atomicrmw.start +; LA64F-NEXT: # =>This Inner Loop Header: Depth=1 +; LA64F-NEXT: st.d $a0, $sp, 16 +; LA64F-NEXT: move $a1, $s0 +; LA64F-NEXT: bl %plt(__adddf3) +; LA64F-NEXT: st.d $a0, $sp, 8 +; LA64F-NEXT: move $a0, $s1 +; LA64F-NEXT: move $a1, $fp +; LA64F-NEXT: move $a2, $s2 +; LA64F-NEXT: move $a3, $s3 +; LA64F-NEXT: move $a4, $s4 +; LA64F-NEXT: move $a5, $s4 +; LA64F-NEXT: bl %plt(__atomic_compare_exchange) +; LA64F-NEXT: move $a1, $a0 +; LA64F-NEXT: ld.d $a0, $sp, 16 +; LA64F-NEXT: beqz $a1, .LBB29_1 +; LA64F-NEXT: # %bb.2: # %atomicrmw.end +; LA64F-NEXT: ld.d $s4, $sp, 24 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload +; LA64F-NEXT: addi.d $sp, $sp, 80 +; LA64F-NEXT: ret +; +; LA64D-LABEL: double_fsub_seq_cst: +; LA64D: # %bb.0: +; LA64D-NEXT: addi.d $sp, $sp, -80 +; LA64D-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill +; LA64D-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill +; LA64D-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill +; LA64D-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill +; LA64D-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill +; LA64D-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill +; LA64D-NEXT: fst.d $fs0, $sp, 24 # 8-byte Folded Spill +; LA64D-NEXT: move $fp, $a0 +; LA64D-NEXT: fld.d $fa0, $a0, 0 +; LA64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI29_0) +; LA64D-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI29_0) +; LA64D-NEXT: fld.d $fs0, $a0, 0 +; LA64D-NEXT: ori $s0, $zero, 8 +; LA64D-NEXT: addi.d $s1, $sp, 16 +; LA64D-NEXT: addi.d $s2, $sp, 8 +; LA64D-NEXT: ori $s3, $zero, 5 +; LA64D-NEXT: .p2align 4, , 16 +; LA64D-NEXT: .LBB29_1: # %atomicrmw.start +; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 +; LA64D-NEXT: fst.d $fa0, $sp, 16 +; LA64D-NEXT: fadd.d $fa0, $fa0, $fs0 +; LA64D-NEXT: fst.d $fa0, $sp, 8 +; LA64D-NEXT: move $a0, $s0 +; LA64D-NEXT: move $a1, $fp +; LA64D-NEXT: move $a2, $s1 +; LA64D-NEXT: move $a3, $s2 +; LA64D-NEXT: move $a4, $s3 +; LA64D-NEXT: move $a5, $s3 +; LA64D-NEXT: bl %plt(__atomic_compare_exchange) +; LA64D-NEXT: fld.d $fa0, $sp, 16 +; LA64D-NEXT: beqz $a0, .LBB29_1 +; LA64D-NEXT: # %bb.2: # %atomicrmw.end +; LA64D-NEXT: fld.d $fs0, $sp, 24 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload +; LA64D-NEXT: addi.d $sp, $sp, 80 +; LA64D-NEXT: ret + %v = atomicrmw fsub ptr %p, double 1.0 seq_cst, align 4 + ret double %v +} + +define double @double_fmin_seq_cst(ptr %p) nounwind { +; LA64F-LABEL: double_fmin_seq_cst: +; LA64F: # %bb.0: +; LA64F-NEXT: addi.d $sp, $sp, -80 +; LA64F-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill +; LA64F-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s4, $sp, 24 # 8-byte Folded Spill +; LA64F-NEXT: move $fp, $a0 +; LA64F-NEXT: ld.d $a0, $a0, 0 +; LA64F-NEXT: lu52i.d $s0, $zero, 1023 +; LA64F-NEXT: ori $s1, $zero, 8 +; LA64F-NEXT: addi.d $s2, $sp, 16 +; LA64F-NEXT: addi.d $s3, $sp, 8 +; LA64F-NEXT: ori $s4, $zero, 5 +; LA64F-NEXT: .p2align 4, , 16 +; LA64F-NEXT: .LBB30_1: # %atomicrmw.start +; LA64F-NEXT: # =>This Inner Loop Header: Depth=1 +; LA64F-NEXT: st.d $a0, $sp, 16 +; LA64F-NEXT: move $a1, $s0 +; LA64F-NEXT: bl %plt(fmin) +; LA64F-NEXT: st.d $a0, $sp, 8 +; LA64F-NEXT: move $a0, $s1 +; LA64F-NEXT: move $a1, $fp +; LA64F-NEXT: move $a2, $s2 +; LA64F-NEXT: move $a3, $s3 +; LA64F-NEXT: move $a4, $s4 +; LA64F-NEXT: move $a5, $s4 +; LA64F-NEXT: bl %plt(__atomic_compare_exchange) +; LA64F-NEXT: move $a1, $a0 +; LA64F-NEXT: ld.d $a0, $sp, 16 +; LA64F-NEXT: beqz $a1, .LBB30_1 +; LA64F-NEXT: # %bb.2: # %atomicrmw.end +; LA64F-NEXT: ld.d $s4, $sp, 24 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload +; LA64F-NEXT: addi.d $sp, $sp, 80 +; LA64F-NEXT: ret +; +; LA64D-LABEL: double_fmin_seq_cst: +; LA64D: # %bb.0: +; LA64D-NEXT: addi.d $sp, $sp, -80 +; LA64D-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill +; LA64D-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill +; LA64D-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill +; LA64D-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill +; LA64D-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill +; LA64D-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill +; LA64D-NEXT: fst.d $fs0, $sp, 24 # 8-byte Folded Spill +; LA64D-NEXT: move $fp, $a0 +; LA64D-NEXT: fld.d $fa0, $a0, 0 +; LA64D-NEXT: addi.d $a0, $zero, 1 +; LA64D-NEXT: movgr2fr.d $fa1, $a0 +; LA64D-NEXT: ffint.d.l $fs0, $fa1 +; LA64D-NEXT: ori $s0, $zero, 8 +; LA64D-NEXT: addi.d $s1, $sp, 16 +; LA64D-NEXT: addi.d $s2, $sp, 8 +; LA64D-NEXT: ori $s3, $zero, 5 +; LA64D-NEXT: .p2align 4, , 16 +; LA64D-NEXT: .LBB30_1: # %atomicrmw.start +; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 +; LA64D-NEXT: fst.d $fa0, $sp, 16 +; LA64D-NEXT: fmax.d $fa0, $fa0, $fa0 +; LA64D-NEXT: fmin.d $fa0, $fa0, $fs0 +; LA64D-NEXT: fst.d $fa0, $sp, 8 +; LA64D-NEXT: move $a0, $s0 +; LA64D-NEXT: move $a1, $fp +; LA64D-NEXT: move $a2, $s1 +; LA64D-NEXT: move $a3, $s2 +; LA64D-NEXT: move $a4, $s3 +; LA64D-NEXT: move $a5, $s3 +; LA64D-NEXT: bl %plt(__atomic_compare_exchange) +; LA64D-NEXT: fld.d $fa0, $sp, 16 +; LA64D-NEXT: beqz $a0, .LBB30_1 +; LA64D-NEXT: # %bb.2: # %atomicrmw.end +; LA64D-NEXT: fld.d $fs0, $sp, 24 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload +; LA64D-NEXT: addi.d $sp, $sp, 80 +; LA64D-NEXT: ret + %v = atomicrmw fmin ptr %p, double 1.0 seq_cst, align 4 + ret double %v +} + +define double @double_fmax_seq_cst(ptr %p) nounwind { +; LA64F-LABEL: double_fmax_seq_cst: +; LA64F: # %bb.0: +; LA64F-NEXT: addi.d $sp, $sp, -80 +; LA64F-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill +; LA64F-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s4, $sp, 24 # 8-byte Folded Spill +; LA64F-NEXT: move $fp, $a0 +; LA64F-NEXT: ld.d $a0, $a0, 0 +; LA64F-NEXT: lu52i.d $s0, $zero, 1023 +; LA64F-NEXT: ori $s1, $zero, 8 +; LA64F-NEXT: addi.d $s2, $sp, 16 +; LA64F-NEXT: addi.d $s3, $sp, 8 +; LA64F-NEXT: ori $s4, $zero, 5 +; LA64F-NEXT: .p2align 4, , 16 +; LA64F-NEXT: .LBB31_1: # %atomicrmw.start +; LA64F-NEXT: # =>This Inner Loop Header: Depth=1 +; LA64F-NEXT: st.d $a0, $sp, 16 +; LA64F-NEXT: move $a1, $s0 +; LA64F-NEXT: bl %plt(fmax) +; LA64F-NEXT: st.d $a0, $sp, 8 +; LA64F-NEXT: move $a0, $s1 +; LA64F-NEXT: move $a1, $fp +; LA64F-NEXT: move $a2, $s2 +; LA64F-NEXT: move $a3, $s3 +; LA64F-NEXT: move $a4, $s4 +; LA64F-NEXT: move $a5, $s4 +; LA64F-NEXT: bl %plt(__atomic_compare_exchange) +; LA64F-NEXT: move $a1, $a0 +; LA64F-NEXT: ld.d $a0, $sp, 16 +; LA64F-NEXT: beqz $a1, .LBB31_1 +; LA64F-NEXT: # %bb.2: # %atomicrmw.end +; LA64F-NEXT: ld.d $s4, $sp, 24 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload +; LA64F-NEXT: addi.d $sp, $sp, 80 +; LA64F-NEXT: ret +; +; LA64D-LABEL: double_fmax_seq_cst: +; LA64D: # %bb.0: +; LA64D-NEXT: addi.d $sp, $sp, -80 +; LA64D-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill +; LA64D-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill +; LA64D-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill +; LA64D-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill +; LA64D-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill +; LA64D-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill +; LA64D-NEXT: fst.d $fs0, $sp, 24 # 8-byte Folded Spill +; LA64D-NEXT: move $fp, $a0 +; LA64D-NEXT: fld.d $fa0, $a0, 0 +; LA64D-NEXT: addi.d $a0, $zero, 1 +; LA64D-NEXT: movgr2fr.d $fa1, $a0 +; LA64D-NEXT: ffint.d.l $fs0, $fa1 +; LA64D-NEXT: ori $s0, $zero, 8 +; LA64D-NEXT: addi.d $s1, $sp, 16 +; LA64D-NEXT: addi.d $s2, $sp, 8 +; LA64D-NEXT: ori $s3, $zero, 5 +; LA64D-NEXT: .p2align 4, , 16 +; LA64D-NEXT: .LBB31_1: # %atomicrmw.start +; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 +; LA64D-NEXT: fst.d $fa0, $sp, 16 +; LA64D-NEXT: fmax.d $fa0, $fa0, $fa0 +; LA64D-NEXT: fmax.d $fa0, $fa0, $fs0 +; LA64D-NEXT: fst.d $fa0, $sp, 8 +; LA64D-NEXT: move $a0, $s0 +; LA64D-NEXT: move $a1, $fp +; LA64D-NEXT: move $a2, $s1 +; LA64D-NEXT: move $a3, $s2 +; LA64D-NEXT: move $a4, $s3 +; LA64D-NEXT: move $a5, $s3 +; LA64D-NEXT: bl %plt(__atomic_compare_exchange) +; LA64D-NEXT: fld.d $fa0, $sp, 16 +; LA64D-NEXT: beqz $a0, .LBB31_1 +; LA64D-NEXT: # %bb.2: # %atomicrmw.end +; LA64D-NEXT: fld.d $fs0, $sp, 24 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload +; LA64D-NEXT: addi.d $sp, $sp, 80 +; LA64D-NEXT: ret + %v = atomicrmw fmax ptr %p, double 1.0 seq_cst, align 4 + ret double %v +} + +define float @float_fadd_monotonic(ptr %p) nounwind { +; LA64F-LABEL: float_fadd_monotonic: +; LA64F: # %bb.0: +; LA64F-NEXT: fld.s $fa0, $a0, 0 +; LA64F-NEXT: addi.w $a1, $zero, 1 +; LA64F-NEXT: movgr2fr.w $fa1, $a1 +; LA64F-NEXT: ffint.s.w $fa1, $fa1 +; LA64F-NEXT: .p2align 4, , 16 +; LA64F-NEXT: .LBB32_1: # %atomicrmw.start +; LA64F-NEXT: # =>This Loop Header: Depth=1 +; LA64F-NEXT: # Child Loop BB32_3 Depth 2 +; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 +; LA64F-NEXT: movfr2gr.s $a1, $fa2 +; LA64F-NEXT: movfr2gr.s $a2, $fa0 +; LA64F-NEXT: addi.w $a2, $a2, 0 +; LA64F-NEXT: .LBB32_3: # %atomicrmw.start +; LA64F-NEXT: # Parent Loop BB32_1 Depth=1 +; LA64F-NEXT: # => This Inner Loop Header: Depth=2 +; LA64F-NEXT: ll.w $a3, $a0, 0 +; LA64F-NEXT: bne $a3, $a2, .LBB32_5 +; LA64F-NEXT: # %bb.4: # %atomicrmw.start +; LA64F-NEXT: # in Loop: Header=BB32_3 Depth=2 +; LA64F-NEXT: move $a4, $a1 +; LA64F-NEXT: sc.w $a4, $a0, 0 +; LA64F-NEXT: beqz $a4, .LBB32_3 +; LA64F-NEXT: b .LBB32_6 +; LA64F-NEXT: .LBB32_5: # %atomicrmw.start +; LA64F-NEXT: # in Loop: Header=BB32_1 Depth=1 +; LA64F-NEXT: dbar 1792 +; LA64F-NEXT: .LBB32_6: # %atomicrmw.start +; LA64F-NEXT: # in Loop: Header=BB32_1 Depth=1 +; LA64F-NEXT: movgr2fr.w $fa0, $a3 +; LA64F-NEXT: bne $a3, $a2, .LBB32_1 +; LA64F-NEXT: # %bb.2: # %atomicrmw.end +; LA64F-NEXT: ret +; +; LA64D-LABEL: float_fadd_monotonic: +; LA64D: # %bb.0: +; LA64D-NEXT: fld.s $fa0, $a0, 0 +; LA64D-NEXT: addi.w $a1, $zero, 1 +; LA64D-NEXT: movgr2fr.w $fa1, $a1 +; LA64D-NEXT: ffint.s.w $fa1, $fa1 +; LA64D-NEXT: .p2align 4, , 16 +; LA64D-NEXT: .LBB32_1: # %atomicrmw.start +; LA64D-NEXT: # =>This Loop Header: Depth=1 +; LA64D-NEXT: # Child Loop BB32_3 Depth 2 +; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 +; LA64D-NEXT: movfr2gr.s $a1, $fa2 +; LA64D-NEXT: movfr2gr.s $a2, $fa0 +; LA64D-NEXT: addi.w $a2, $a2, 0 +; LA64D-NEXT: .LBB32_3: # %atomicrmw.start +; LA64D-NEXT: # Parent Loop BB32_1 Depth=1 +; LA64D-NEXT: # => This Inner Loop Header: Depth=2 +; LA64D-NEXT: ll.w $a3, $a0, 0 +; LA64D-NEXT: bne $a3, $a2, .LBB32_5 +; LA64D-NEXT: # %bb.4: # %atomicrmw.start +; LA64D-NEXT: # in Loop: Header=BB32_3 Depth=2 +; LA64D-NEXT: move $a4, $a1 +; LA64D-NEXT: sc.w $a4, $a0, 0 +; LA64D-NEXT: beqz $a4, .LBB32_3 +; LA64D-NEXT: b .LBB32_6 +; LA64D-NEXT: .LBB32_5: # %atomicrmw.start +; LA64D-NEXT: # in Loop: Header=BB32_1 Depth=1 +; LA64D-NEXT: dbar 1792 +; LA64D-NEXT: .LBB32_6: # %atomicrmw.start +; LA64D-NEXT: # in Loop: Header=BB32_1 Depth=1 +; LA64D-NEXT: movgr2fr.w $fa0, $a3 +; LA64D-NEXT: bne $a3, $a2, .LBB32_1 +; LA64D-NEXT: # %bb.2: # %atomicrmw.end +; LA64D-NEXT: ret + %v = atomicrmw fadd ptr %p, float 1.0 monotonic, align 4 + ret float %v +} + +define float @float_fsub_monotonic(ptr %p) nounwind { +; LA64F-LABEL: float_fsub_monotonic: +; LA64F: # %bb.0: +; LA64F-NEXT: fld.s $fa0, $a0, 0 +; LA64F-NEXT: pcalau12i $a1, %pc_hi20(.LCPI33_0) +; LA64F-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI33_0) +; LA64F-NEXT: fld.s $fa1, $a1, 0 +; LA64F-NEXT: .p2align 4, , 16 +; LA64F-NEXT: .LBB33_1: # %atomicrmw.start +; LA64F-NEXT: # =>This Loop Header: Depth=1 +; LA64F-NEXT: # Child Loop BB33_3 Depth 2 +; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 +; LA64F-NEXT: movfr2gr.s $a1, $fa2 +; LA64F-NEXT: movfr2gr.s $a2, $fa0 +; LA64F-NEXT: addi.w $a2, $a2, 0 +; LA64F-NEXT: .LBB33_3: # %atomicrmw.start +; LA64F-NEXT: # Parent Loop BB33_1 Depth=1 +; LA64F-NEXT: # => This Inner Loop Header: Depth=2 +; LA64F-NEXT: ll.w $a3, $a0, 0 +; LA64F-NEXT: bne $a3, $a2, .LBB33_5 +; LA64F-NEXT: # %bb.4: # %atomicrmw.start +; LA64F-NEXT: # in Loop: Header=BB33_3 Depth=2 +; LA64F-NEXT: move $a4, $a1 +; LA64F-NEXT: sc.w $a4, $a0, 0 +; LA64F-NEXT: beqz $a4, .LBB33_3 +; LA64F-NEXT: b .LBB33_6 +; LA64F-NEXT: .LBB33_5: # %atomicrmw.start +; LA64F-NEXT: # in Loop: Header=BB33_1 Depth=1 +; LA64F-NEXT: dbar 1792 +; LA64F-NEXT: .LBB33_6: # %atomicrmw.start +; LA64F-NEXT: # in Loop: Header=BB33_1 Depth=1 +; LA64F-NEXT: movgr2fr.w $fa0, $a3 +; LA64F-NEXT: bne $a3, $a2, .LBB33_1 +; LA64F-NEXT: # %bb.2: # %atomicrmw.end +; LA64F-NEXT: ret +; +; LA64D-LABEL: float_fsub_monotonic: +; LA64D: # %bb.0: +; LA64D-NEXT: fld.s $fa0, $a0, 0 +; LA64D-NEXT: pcalau12i $a1, %pc_hi20(.LCPI33_0) +; LA64D-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI33_0) +; LA64D-NEXT: fld.s $fa1, $a1, 0 +; LA64D-NEXT: .p2align 4, , 16 +; LA64D-NEXT: .LBB33_1: # %atomicrmw.start +; LA64D-NEXT: # =>This Loop Header: Depth=1 +; LA64D-NEXT: # Child Loop BB33_3 Depth 2 +; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 +; LA64D-NEXT: movfr2gr.s $a1, $fa2 +; LA64D-NEXT: movfr2gr.s $a2, $fa0 +; LA64D-NEXT: addi.w $a2, $a2, 0 +; LA64D-NEXT: .LBB33_3: # %atomicrmw.start +; LA64D-NEXT: # Parent Loop BB33_1 Depth=1 +; LA64D-NEXT: # => This Inner Loop Header: Depth=2 +; LA64D-NEXT: ll.w $a3, $a0, 0 +; LA64D-NEXT: bne $a3, $a2, .LBB33_5 +; LA64D-NEXT: # %bb.4: # %atomicrmw.start +; LA64D-NEXT: # in Loop: Header=BB33_3 Depth=2 +; LA64D-NEXT: move $a4, $a1 +; LA64D-NEXT: sc.w $a4, $a0, 0 +; LA64D-NEXT: beqz $a4, .LBB33_3 +; LA64D-NEXT: b .LBB33_6 +; LA64D-NEXT: .LBB33_5: # %atomicrmw.start +; LA64D-NEXT: # in Loop: Header=BB33_1 Depth=1 +; LA64D-NEXT: dbar 1792 +; LA64D-NEXT: .LBB33_6: # %atomicrmw.start +; LA64D-NEXT: # in Loop: Header=BB33_1 Depth=1 +; LA64D-NEXT: movgr2fr.w $fa0, $a3 +; LA64D-NEXT: bne $a3, $a2, .LBB33_1 +; LA64D-NEXT: # %bb.2: # %atomicrmw.end +; LA64D-NEXT: ret + %v = atomicrmw fsub ptr %p, float 1.0 monotonic, align 4 + ret float %v +} + +define float @float_fmin_monotonic(ptr %p) nounwind { +; LA64F-LABEL: float_fmin_monotonic: +; LA64F: # %bb.0: +; LA64F-NEXT: fld.s $fa0, $a0, 0 +; LA64F-NEXT: addi.w $a1, $zero, 1 +; LA64F-NEXT: movgr2fr.w $fa1, $a1 +; LA64F-NEXT: ffint.s.w $fa1, $fa1 +; LA64F-NEXT: .p2align 4, , 16 +; LA64F-NEXT: .LBB34_1: # %atomicrmw.start +; LA64F-NEXT: # =>This Loop Header: Depth=1 +; LA64F-NEXT: # Child Loop BB34_3 Depth 2 +; LA64F-NEXT: fmax.s $fa2, $fa0, $fa0 +; LA64F-NEXT: fmin.s $fa2, $fa2, $fa1 +; LA64F-NEXT: movfr2gr.s $a1, $fa2 +; LA64F-NEXT: movfr2gr.s $a2, $fa0 +; LA64F-NEXT: addi.w $a2, $a2, 0 +; LA64F-NEXT: .LBB34_3: # %atomicrmw.start +; LA64F-NEXT: # Parent Loop BB34_1 Depth=1 +; LA64F-NEXT: # => This Inner Loop Header: Depth=2 +; LA64F-NEXT: ll.w $a3, $a0, 0 +; LA64F-NEXT: bne $a3, $a2, .LBB34_5 +; LA64F-NEXT: # %bb.4: # %atomicrmw.start +; LA64F-NEXT: # in Loop: Header=BB34_3 Depth=2 +; LA64F-NEXT: move $a4, $a1 +; LA64F-NEXT: sc.w $a4, $a0, 0 +; LA64F-NEXT: beqz $a4, .LBB34_3 +; LA64F-NEXT: b .LBB34_6 +; LA64F-NEXT: .LBB34_5: # %atomicrmw.start +; LA64F-NEXT: # in Loop: Header=BB34_1 Depth=1 +; LA64F-NEXT: dbar 1792 +; LA64F-NEXT: .LBB34_6: # %atomicrmw.start +; LA64F-NEXT: # in Loop: Header=BB34_1 Depth=1 +; LA64F-NEXT: movgr2fr.w $fa0, $a3 +; LA64F-NEXT: bne $a3, $a2, .LBB34_1 +; LA64F-NEXT: # %bb.2: # %atomicrmw.end +; LA64F-NEXT: ret +; +; LA64D-LABEL: float_fmin_monotonic: +; LA64D: # %bb.0: +; LA64D-NEXT: fld.s $fa0, $a0, 0 +; LA64D-NEXT: addi.w $a1, $zero, 1 +; LA64D-NEXT: movgr2fr.w $fa1, $a1 +; LA64D-NEXT: ffint.s.w $fa1, $fa1 +; LA64D-NEXT: .p2align 4, , 16 +; LA64D-NEXT: .LBB34_1: # %atomicrmw.start +; LA64D-NEXT: # =>This Loop Header: Depth=1 +; LA64D-NEXT: # Child Loop BB34_3 Depth 2 +; LA64D-NEXT: fmax.s $fa2, $fa0, $fa0 +; LA64D-NEXT: fmin.s $fa2, $fa2, $fa1 +; LA64D-NEXT: movfr2gr.s $a1, $fa2 +; LA64D-NEXT: movfr2gr.s $a2, $fa0 +; LA64D-NEXT: addi.w $a2, $a2, 0 +; LA64D-NEXT: .LBB34_3: # %atomicrmw.start +; LA64D-NEXT: # Parent Loop BB34_1 Depth=1 +; LA64D-NEXT: # => This Inner Loop Header: Depth=2 +; LA64D-NEXT: ll.w $a3, $a0, 0 +; LA64D-NEXT: bne $a3, $a2, .LBB34_5 +; LA64D-NEXT: # %bb.4: # %atomicrmw.start +; LA64D-NEXT: # in Loop: Header=BB34_3 Depth=2 +; LA64D-NEXT: move $a4, $a1 +; LA64D-NEXT: sc.w $a4, $a0, 0 +; LA64D-NEXT: beqz $a4, .LBB34_3 +; LA64D-NEXT: b .LBB34_6 +; LA64D-NEXT: .LBB34_5: # %atomicrmw.start +; LA64D-NEXT: # in Loop: Header=BB34_1 Depth=1 +; LA64D-NEXT: dbar 1792 +; LA64D-NEXT: .LBB34_6: # %atomicrmw.start +; LA64D-NEXT: # in Loop: Header=BB34_1 Depth=1 +; LA64D-NEXT: movgr2fr.w $fa0, $a3 +; LA64D-NEXT: bne $a3, $a2, .LBB34_1 +; LA64D-NEXT: # %bb.2: # %atomicrmw.end +; LA64D-NEXT: ret + %v = atomicrmw fmin ptr %p, float 1.0 monotonic, align 4 + ret float %v +} + +define float @float_fmax_monotonic(ptr %p) nounwind { +; LA64F-LABEL: float_fmax_monotonic: +; LA64F: # %bb.0: +; LA64F-NEXT: fld.s $fa0, $a0, 0 +; LA64F-NEXT: addi.w $a1, $zero, 1 +; LA64F-NEXT: movgr2fr.w $fa1, $a1 +; LA64F-NEXT: ffint.s.w $fa1, $fa1 +; LA64F-NEXT: .p2align 4, , 16 +; LA64F-NEXT: .LBB35_1: # %atomicrmw.start +; LA64F-NEXT: # =>This Loop Header: Depth=1 +; LA64F-NEXT: # Child Loop BB35_3 Depth 2 +; LA64F-NEXT: fmax.s $fa2, $fa0, $fa0 +; LA64F-NEXT: fmax.s $fa2, $fa2, $fa1 +; LA64F-NEXT: movfr2gr.s $a1, $fa2 +; LA64F-NEXT: movfr2gr.s $a2, $fa0 +; LA64F-NEXT: addi.w $a2, $a2, 0 +; LA64F-NEXT: .LBB35_3: # %atomicrmw.start +; LA64F-NEXT: # Parent Loop BB35_1 Depth=1 +; LA64F-NEXT: # => This Inner Loop Header: Depth=2 +; LA64F-NEXT: ll.w $a3, $a0, 0 +; LA64F-NEXT: bne $a3, $a2, .LBB35_5 +; LA64F-NEXT: # %bb.4: # %atomicrmw.start +; LA64F-NEXT: # in Loop: Header=BB35_3 Depth=2 +; LA64F-NEXT: move $a4, $a1 +; LA64F-NEXT: sc.w $a4, $a0, 0 +; LA64F-NEXT: beqz $a4, .LBB35_3 +; LA64F-NEXT: b .LBB35_6 +; LA64F-NEXT: .LBB35_5: # %atomicrmw.start +; LA64F-NEXT: # in Loop: Header=BB35_1 Depth=1 +; LA64F-NEXT: dbar 1792 +; LA64F-NEXT: .LBB35_6: # %atomicrmw.start +; LA64F-NEXT: # in Loop: Header=BB35_1 Depth=1 +; LA64F-NEXT: movgr2fr.w $fa0, $a3 +; LA64F-NEXT: bne $a3, $a2, .LBB35_1 +; LA64F-NEXT: # %bb.2: # %atomicrmw.end +; LA64F-NEXT: ret +; +; LA64D-LABEL: float_fmax_monotonic: +; LA64D: # %bb.0: +; LA64D-NEXT: fld.s $fa0, $a0, 0 +; LA64D-NEXT: addi.w $a1, $zero, 1 +; LA64D-NEXT: movgr2fr.w $fa1, $a1 +; LA64D-NEXT: ffint.s.w $fa1, $fa1 +; LA64D-NEXT: .p2align 4, , 16 +; LA64D-NEXT: .LBB35_1: # %atomicrmw.start +; LA64D-NEXT: # =>This Loop Header: Depth=1 +; LA64D-NEXT: # Child Loop BB35_3 Depth 2 +; LA64D-NEXT: fmax.s $fa2, $fa0, $fa0 +; LA64D-NEXT: fmax.s $fa2, $fa2, $fa1 +; LA64D-NEXT: movfr2gr.s $a1, $fa2 +; LA64D-NEXT: movfr2gr.s $a2, $fa0 +; LA64D-NEXT: addi.w $a2, $a2, 0 +; LA64D-NEXT: .LBB35_3: # %atomicrmw.start +; LA64D-NEXT: # Parent Loop BB35_1 Depth=1 +; LA64D-NEXT: # => This Inner Loop Header: Depth=2 +; LA64D-NEXT: ll.w $a3, $a0, 0 +; LA64D-NEXT: bne $a3, $a2, .LBB35_5 +; LA64D-NEXT: # %bb.4: # %atomicrmw.start +; LA64D-NEXT: # in Loop: Header=BB35_3 Depth=2 +; LA64D-NEXT: move $a4, $a1 +; LA64D-NEXT: sc.w $a4, $a0, 0 +; LA64D-NEXT: beqz $a4, .LBB35_3 +; LA64D-NEXT: b .LBB35_6 +; LA64D-NEXT: .LBB35_5: # %atomicrmw.start +; LA64D-NEXT: # in Loop: Header=BB35_1 Depth=1 +; LA64D-NEXT: dbar 1792 +; LA64D-NEXT: .LBB35_6: # %atomicrmw.start +; LA64D-NEXT: # in Loop: Header=BB35_1 Depth=1 +; LA64D-NEXT: movgr2fr.w $fa0, $a3 +; LA64D-NEXT: bne $a3, $a2, .LBB35_1 +; LA64D-NEXT: # %bb.2: # %atomicrmw.end +; LA64D-NEXT: ret + %v = atomicrmw fmax ptr %p, float 1.0 monotonic, align 4 + ret float %v +} + +define double @double_fadd_monotonic(ptr %p) nounwind { +; LA64F-LABEL: double_fadd_monotonic: +; LA64F: # %bb.0: +; LA64F-NEXT: addi.d $sp, $sp, -64 +; LA64F-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; LA64F-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s0, $sp, 40 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s1, $sp, 32 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s2, $sp, 24 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s3, $sp, 16 # 8-byte Folded Spill +; LA64F-NEXT: move $fp, $a0 +; LA64F-NEXT: ld.d $a0, $a0, 0 +; LA64F-NEXT: lu52i.d $s0, $zero, 1023 +; LA64F-NEXT: ori $s1, $zero, 8 +; LA64F-NEXT: addi.d $s2, $sp, 8 +; LA64F-NEXT: addi.d $s3, $sp, 0 +; LA64F-NEXT: .p2align 4, , 16 +; LA64F-NEXT: .LBB36_1: # %atomicrmw.start +; LA64F-NEXT: # =>This Inner Loop Header: Depth=1 +; LA64F-NEXT: st.d $a0, $sp, 8 +; LA64F-NEXT: move $a1, $s0 +; LA64F-NEXT: bl %plt(__adddf3) +; LA64F-NEXT: st.d $a0, $sp, 0 +; LA64F-NEXT: move $a0, $s1 +; LA64F-NEXT: move $a1, $fp +; LA64F-NEXT: move $a2, $s2 +; LA64F-NEXT: move $a3, $s3 +; LA64F-NEXT: move $a4, $zero +; LA64F-NEXT: move $a5, $zero +; LA64F-NEXT: bl %plt(__atomic_compare_exchange) +; LA64F-NEXT: move $a1, $a0 +; LA64F-NEXT: ld.d $a0, $sp, 8 +; LA64F-NEXT: beqz $a1, .LBB36_1 +; LA64F-NEXT: # %bb.2: # %atomicrmw.end +; LA64F-NEXT: ld.d $s3, $sp, 16 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s2, $sp, 24 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s1, $sp, 32 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s0, $sp, 40 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; LA64F-NEXT: addi.d $sp, $sp, 64 +; LA64F-NEXT: ret +; +; LA64D-LABEL: double_fadd_monotonic: +; LA64D: # %bb.0: +; LA64D-NEXT: addi.d $sp, $sp, -64 +; LA64D-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; LA64D-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; LA64D-NEXT: st.d $s0, $sp, 40 # 8-byte Folded Spill +; LA64D-NEXT: st.d $s1, $sp, 32 # 8-byte Folded Spill +; LA64D-NEXT: st.d $s2, $sp, 24 # 8-byte Folded Spill +; LA64D-NEXT: fst.d $fs0, $sp, 16 # 8-byte Folded Spill +; LA64D-NEXT: move $fp, $a0 +; LA64D-NEXT: fld.d $fa0, $a0, 0 +; LA64D-NEXT: addi.d $a0, $zero, 1 +; LA64D-NEXT: movgr2fr.d $fa1, $a0 +; LA64D-NEXT: ffint.d.l $fs0, $fa1 +; LA64D-NEXT: ori $s0, $zero, 8 +; LA64D-NEXT: addi.d $s1, $sp, 8 +; LA64D-NEXT: addi.d $s2, $sp, 0 +; LA64D-NEXT: .p2align 4, , 16 +; LA64D-NEXT: .LBB36_1: # %atomicrmw.start +; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 +; LA64D-NEXT: fst.d $fa0, $sp, 8 +; LA64D-NEXT: fadd.d $fa0, $fa0, $fs0 +; LA64D-NEXT: fst.d $fa0, $sp, 0 +; LA64D-NEXT: move $a0, $s0 +; LA64D-NEXT: move $a1, $fp +; LA64D-NEXT: move $a2, $s1 +; LA64D-NEXT: move $a3, $s2 +; LA64D-NEXT: move $a4, $zero +; LA64D-NEXT: move $a5, $zero +; LA64D-NEXT: bl %plt(__atomic_compare_exchange) +; LA64D-NEXT: fld.d $fa0, $sp, 8 +; LA64D-NEXT: beqz $a0, .LBB36_1 +; LA64D-NEXT: # %bb.2: # %atomicrmw.end +; LA64D-NEXT: fld.d $fs0, $sp, 16 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $s2, $sp, 24 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $s1, $sp, 32 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $s0, $sp, 40 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; LA64D-NEXT: addi.d $sp, $sp, 64 +; LA64D-NEXT: ret + %v = atomicrmw fadd ptr %p, double 1.0 monotonic, align 4 + ret double %v +} + +define double @double_fsub_monotonic(ptr %p) nounwind { +; LA64F-LABEL: double_fsub_monotonic: +; LA64F: # %bb.0: +; LA64F-NEXT: addi.d $sp, $sp, -64 +; LA64F-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; LA64F-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s0, $sp, 40 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s1, $sp, 32 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s2, $sp, 24 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s3, $sp, 16 # 8-byte Folded Spill +; LA64F-NEXT: move $fp, $a0 +; LA64F-NEXT: ld.d $a0, $a0, 0 +; LA64F-NEXT: lu52i.d $s0, $zero, -1025 +; LA64F-NEXT: ori $s1, $zero, 8 +; LA64F-NEXT: addi.d $s2, $sp, 8 +; LA64F-NEXT: addi.d $s3, $sp, 0 +; LA64F-NEXT: .p2align 4, , 16 +; LA64F-NEXT: .LBB37_1: # %atomicrmw.start +; LA64F-NEXT: # =>This Inner Loop Header: Depth=1 +; LA64F-NEXT: st.d $a0, $sp, 8 +; LA64F-NEXT: move $a1, $s0 +; LA64F-NEXT: bl %plt(__adddf3) +; LA64F-NEXT: st.d $a0, $sp, 0 +; LA64F-NEXT: move $a0, $s1 +; LA64F-NEXT: move $a1, $fp +; LA64F-NEXT: move $a2, $s2 +; LA64F-NEXT: move $a3, $s3 +; LA64F-NEXT: move $a4, $zero +; LA64F-NEXT: move $a5, $zero +; LA64F-NEXT: bl %plt(__atomic_compare_exchange) +; LA64F-NEXT: move $a1, $a0 +; LA64F-NEXT: ld.d $a0, $sp, 8 +; LA64F-NEXT: beqz $a1, .LBB37_1 +; LA64F-NEXT: # %bb.2: # %atomicrmw.end +; LA64F-NEXT: ld.d $s3, $sp, 16 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s2, $sp, 24 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s1, $sp, 32 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s0, $sp, 40 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; LA64F-NEXT: addi.d $sp, $sp, 64 +; LA64F-NEXT: ret +; +; LA64D-LABEL: double_fsub_monotonic: +; LA64D: # %bb.0: +; LA64D-NEXT: addi.d $sp, $sp, -64 +; LA64D-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; LA64D-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; LA64D-NEXT: st.d $s0, $sp, 40 # 8-byte Folded Spill +; LA64D-NEXT: st.d $s1, $sp, 32 # 8-byte Folded Spill +; LA64D-NEXT: st.d $s2, $sp, 24 # 8-byte Folded Spill +; LA64D-NEXT: fst.d $fs0, $sp, 16 # 8-byte Folded Spill +; LA64D-NEXT: move $fp, $a0 +; LA64D-NEXT: fld.d $fa0, $a0, 0 +; LA64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI37_0) +; LA64D-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI37_0) +; LA64D-NEXT: fld.d $fs0, $a0, 0 +; LA64D-NEXT: ori $s0, $zero, 8 +; LA64D-NEXT: addi.d $s1, $sp, 8 +; LA64D-NEXT: addi.d $s2, $sp, 0 +; LA64D-NEXT: .p2align 4, , 16 +; LA64D-NEXT: .LBB37_1: # %atomicrmw.start +; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 +; LA64D-NEXT: fst.d $fa0, $sp, 8 +; LA64D-NEXT: fadd.d $fa0, $fa0, $fs0 +; LA64D-NEXT: fst.d $fa0, $sp, 0 +; LA64D-NEXT: move $a0, $s0 +; LA64D-NEXT: move $a1, $fp +; LA64D-NEXT: move $a2, $s1 +; LA64D-NEXT: move $a3, $s2 +; LA64D-NEXT: move $a4, $zero +; LA64D-NEXT: move $a5, $zero +; LA64D-NEXT: bl %plt(__atomic_compare_exchange) +; LA64D-NEXT: fld.d $fa0, $sp, 8 +; LA64D-NEXT: beqz $a0, .LBB37_1 +; LA64D-NEXT: # %bb.2: # %atomicrmw.end +; LA64D-NEXT: fld.d $fs0, $sp, 16 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $s2, $sp, 24 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $s1, $sp, 32 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $s0, $sp, 40 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; LA64D-NEXT: addi.d $sp, $sp, 64 +; LA64D-NEXT: ret + %v = atomicrmw fsub ptr %p, double 1.0 monotonic, align 4 + ret double %v +} + +define double @double_fmin_monotonic(ptr %p) nounwind { +; LA64F-LABEL: double_fmin_monotonic: +; LA64F: # %bb.0: +; LA64F-NEXT: addi.d $sp, $sp, -64 +; LA64F-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; LA64F-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s0, $sp, 40 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s1, $sp, 32 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s2, $sp, 24 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s3, $sp, 16 # 8-byte Folded Spill +; LA64F-NEXT: move $fp, $a0 +; LA64F-NEXT: ld.d $a0, $a0, 0 +; LA64F-NEXT: lu52i.d $s0, $zero, 1023 +; LA64F-NEXT: ori $s1, $zero, 8 +; LA64F-NEXT: addi.d $s2, $sp, 8 +; LA64F-NEXT: addi.d $s3, $sp, 0 +; LA64F-NEXT: .p2align 4, , 16 +; LA64F-NEXT: .LBB38_1: # %atomicrmw.start +; LA64F-NEXT: # =>This Inner Loop Header: Depth=1 +; LA64F-NEXT: st.d $a0, $sp, 8 +; LA64F-NEXT: move $a1, $s0 +; LA64F-NEXT: bl %plt(fmin) +; LA64F-NEXT: st.d $a0, $sp, 0 +; LA64F-NEXT: move $a0, $s1 +; LA64F-NEXT: move $a1, $fp +; LA64F-NEXT: move $a2, $s2 +; LA64F-NEXT: move $a3, $s3 +; LA64F-NEXT: move $a4, $zero +; LA64F-NEXT: move $a5, $zero +; LA64F-NEXT: bl %plt(__atomic_compare_exchange) +; LA64F-NEXT: move $a1, $a0 +; LA64F-NEXT: ld.d $a0, $sp, 8 +; LA64F-NEXT: beqz $a1, .LBB38_1 +; LA64F-NEXT: # %bb.2: # %atomicrmw.end +; LA64F-NEXT: ld.d $s3, $sp, 16 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s2, $sp, 24 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s1, $sp, 32 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s0, $sp, 40 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; LA64F-NEXT: addi.d $sp, $sp, 64 +; LA64F-NEXT: ret +; +; LA64D-LABEL: double_fmin_monotonic: +; LA64D: # %bb.0: +; LA64D-NEXT: addi.d $sp, $sp, -64 +; LA64D-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; LA64D-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; LA64D-NEXT: st.d $s0, $sp, 40 # 8-byte Folded Spill +; LA64D-NEXT: st.d $s1, $sp, 32 # 8-byte Folded Spill +; LA64D-NEXT: st.d $s2, $sp, 24 # 8-byte Folded Spill +; LA64D-NEXT: fst.d $fs0, $sp, 16 # 8-byte Folded Spill +; LA64D-NEXT: move $fp, $a0 +; LA64D-NEXT: fld.d $fa0, $a0, 0 +; LA64D-NEXT: addi.d $a0, $zero, 1 +; LA64D-NEXT: movgr2fr.d $fa1, $a0 +; LA64D-NEXT: ffint.d.l $fs0, $fa1 +; LA64D-NEXT: ori $s0, $zero, 8 +; LA64D-NEXT: addi.d $s1, $sp, 8 +; LA64D-NEXT: addi.d $s2, $sp, 0 +; LA64D-NEXT: .p2align 4, , 16 +; LA64D-NEXT: .LBB38_1: # %atomicrmw.start +; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 +; LA64D-NEXT: fst.d $fa0, $sp, 8 +; LA64D-NEXT: fmax.d $fa0, $fa0, $fa0 +; LA64D-NEXT: fmin.d $fa0, $fa0, $fs0 +; LA64D-NEXT: fst.d $fa0, $sp, 0 +; LA64D-NEXT: move $a0, $s0 +; LA64D-NEXT: move $a1, $fp +; LA64D-NEXT: move $a2, $s1 +; LA64D-NEXT: move $a3, $s2 +; LA64D-NEXT: move $a4, $zero +; LA64D-NEXT: move $a5, $zero +; LA64D-NEXT: bl %plt(__atomic_compare_exchange) +; LA64D-NEXT: fld.d $fa0, $sp, 8 +; LA64D-NEXT: beqz $a0, .LBB38_1 +; LA64D-NEXT: # %bb.2: # %atomicrmw.end +; LA64D-NEXT: fld.d $fs0, $sp, 16 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $s2, $sp, 24 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $s1, $sp, 32 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $s0, $sp, 40 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; LA64D-NEXT: addi.d $sp, $sp, 64 +; LA64D-NEXT: ret + %v = atomicrmw fmin ptr %p, double 1.0 monotonic, align 4 + ret double %v +} + +define double @double_fmax_monotonic(ptr %p) nounwind { +; LA64F-LABEL: double_fmax_monotonic: +; LA64F: # %bb.0: +; LA64F-NEXT: addi.d $sp, $sp, -64 +; LA64F-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; LA64F-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s0, $sp, 40 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s1, $sp, 32 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s2, $sp, 24 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s3, $sp, 16 # 8-byte Folded Spill +; LA64F-NEXT: move $fp, $a0 +; LA64F-NEXT: ld.d $a0, $a0, 0 +; LA64F-NEXT: lu52i.d $s0, $zero, 1023 +; LA64F-NEXT: ori $s1, $zero, 8 +; LA64F-NEXT: addi.d $s2, $sp, 8 +; LA64F-NEXT: addi.d $s3, $sp, 0 +; LA64F-NEXT: .p2align 4, , 16 +; LA64F-NEXT: .LBB39_1: # %atomicrmw.start +; LA64F-NEXT: # =>This Inner Loop Header: Depth=1 +; LA64F-NEXT: st.d $a0, $sp, 8 +; LA64F-NEXT: move $a1, $s0 +; LA64F-NEXT: bl %plt(fmax) +; LA64F-NEXT: st.d $a0, $sp, 0 +; LA64F-NEXT: move $a0, $s1 +; LA64F-NEXT: move $a1, $fp +; LA64F-NEXT: move $a2, $s2 +; LA64F-NEXT: move $a3, $s3 +; LA64F-NEXT: move $a4, $zero +; LA64F-NEXT: move $a5, $zero +; LA64F-NEXT: bl %plt(__atomic_compare_exchange) +; LA64F-NEXT: move $a1, $a0 +; LA64F-NEXT: ld.d $a0, $sp, 8 +; LA64F-NEXT: beqz $a1, .LBB39_1 +; LA64F-NEXT: # %bb.2: # %atomicrmw.end +; LA64F-NEXT: ld.d $s3, $sp, 16 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s2, $sp, 24 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s1, $sp, 32 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s0, $sp, 40 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; LA64F-NEXT: addi.d $sp, $sp, 64 +; LA64F-NEXT: ret +; +; LA64D-LABEL: double_fmax_monotonic: +; LA64D: # %bb.0: +; LA64D-NEXT: addi.d $sp, $sp, -64 +; LA64D-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; LA64D-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; LA64D-NEXT: st.d $s0, $sp, 40 # 8-byte Folded Spill +; LA64D-NEXT: st.d $s1, $sp, 32 # 8-byte Folded Spill +; LA64D-NEXT: st.d $s2, $sp, 24 # 8-byte Folded Spill +; LA64D-NEXT: fst.d $fs0, $sp, 16 # 8-byte Folded Spill +; LA64D-NEXT: move $fp, $a0 +; LA64D-NEXT: fld.d $fa0, $a0, 0 +; LA64D-NEXT: addi.d $a0, $zero, 1 +; LA64D-NEXT: movgr2fr.d $fa1, $a0 +; LA64D-NEXT: ffint.d.l $fs0, $fa1 +; LA64D-NEXT: ori $s0, $zero, 8 +; LA64D-NEXT: addi.d $s1, $sp, 8 +; LA64D-NEXT: addi.d $s2, $sp, 0 +; LA64D-NEXT: .p2align 4, , 16 +; LA64D-NEXT: .LBB39_1: # %atomicrmw.start +; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 +; LA64D-NEXT: fst.d $fa0, $sp, 8 +; LA64D-NEXT: fmax.d $fa0, $fa0, $fa0 +; LA64D-NEXT: fmax.d $fa0, $fa0, $fs0 +; LA64D-NEXT: fst.d $fa0, $sp, 0 +; LA64D-NEXT: move $a0, $s0 +; LA64D-NEXT: move $a1, $fp +; LA64D-NEXT: move $a2, $s1 +; LA64D-NEXT: move $a3, $s2 +; LA64D-NEXT: move $a4, $zero +; LA64D-NEXT: move $a5, $zero +; LA64D-NEXT: bl %plt(__atomic_compare_exchange) +; LA64D-NEXT: fld.d $fa0, $sp, 8 +; LA64D-NEXT: beqz $a0, .LBB39_1 +; LA64D-NEXT: # %bb.2: # %atomicrmw.end +; LA64D-NEXT: fld.d $fs0, $sp, 16 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $s2, $sp, 24 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $s1, $sp, 32 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $s0, $sp, 40 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; LA64D-NEXT: addi.d $sp, $sp, 64 +; LA64D-NEXT: ret + %v = atomicrmw fmax ptr %p, double 1.0 monotonic, align 4 + ret double %v +} diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll index cd4a9e7fa9c4fff87f3e171ac6bede25998a3100..770358a05bfd565c4dba0cbc47d00f7675d1a444 100644 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll @@ -17,7 +17,6 @@ define i8 @atomicrmw_umax_i8_acquire(ptr %a, i8 %b) nounwind { ; LA64-NEXT: sll.w $a1, $a1, $a0 ; LA64-NEXT: addi.w $a1, $a1, 0 ; LA64-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 -; LA64-NEXT: dbar 0 ; LA64-NEXT: ll.w $a4, $a2, 0 ; LA64-NEXT: and $a6, $a4, $a3 ; LA64-NEXT: move $a5, $a4 @@ -30,8 +29,6 @@ define i8 @atomicrmw_umax_i8_acquire(ptr %a, i8 %b) nounwind { ; LA64-NEXT: sc.w $a5, $a2, 0 ; LA64-NEXT: beqz $a5, .LBB0_1 ; LA64-NEXT: # %bb.4: -; LA64-NEXT: dbar 1792 -; LA64-NEXT: # %bb.5: ; LA64-NEXT: srl.w $a0, $a4, $a0 ; LA64-NEXT: ret %1 = atomicrmw umax ptr %a, i8 %b acquire @@ -52,7 +49,6 @@ define i16 @atomicrmw_umax_i16_acquire(ptr %a, i16 %b) nounwind { ; LA64-NEXT: sll.w $a1, $a1, $a0 ; LA64-NEXT: addi.w $a1, $a1, 0 ; LA64-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 -; LA64-NEXT: dbar 0 ; LA64-NEXT: ll.w $a4, $a2, 0 ; LA64-NEXT: and $a6, $a4, $a3 ; LA64-NEXT: move $a5, $a4 @@ -65,8 +61,6 @@ define i16 @atomicrmw_umax_i16_acquire(ptr %a, i16 %b) nounwind { ; LA64-NEXT: sc.w $a5, $a2, 0 ; LA64-NEXT: beqz $a5, .LBB1_1 ; LA64-NEXT: # %bb.4: -; LA64-NEXT: dbar 1792 -; LA64-NEXT: # %bb.5: ; LA64-NEXT: srl.w $a0, $a4, $a0 ; LA64-NEXT: ret %1 = atomicrmw umax ptr %a, i16 %b acquire @@ -106,7 +100,6 @@ define i8 @atomicrmw_umin_i8_acquire(ptr %a, i8 %b) nounwind { ; LA64-NEXT: sll.w $a1, $a1, $a0 ; LA64-NEXT: addi.w $a1, $a1, 0 ; LA64-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 -; LA64-NEXT: dbar 0 ; LA64-NEXT: ll.w $a4, $a2, 0 ; LA64-NEXT: and $a6, $a4, $a3 ; LA64-NEXT: move $a5, $a4 @@ -119,8 +112,6 @@ define i8 @atomicrmw_umin_i8_acquire(ptr %a, i8 %b) nounwind { ; LA64-NEXT: sc.w $a5, $a2, 0 ; LA64-NEXT: beqz $a5, .LBB4_1 ; LA64-NEXT: # %bb.4: -; LA64-NEXT: dbar 1792 -; LA64-NEXT: # %bb.5: ; LA64-NEXT: srl.w $a0, $a4, $a0 ; LA64-NEXT: ret %1 = atomicrmw umin ptr %a, i8 %b acquire @@ -141,7 +132,6 @@ define i16 @atomicrmw_umin_i16_acquire(ptr %a, i16 %b) nounwind { ; LA64-NEXT: sll.w $a1, $a1, $a0 ; LA64-NEXT: addi.w $a1, $a1, 0 ; LA64-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 -; LA64-NEXT: dbar 0 ; LA64-NEXT: ll.w $a4, $a2, 0 ; LA64-NEXT: and $a6, $a4, $a3 ; LA64-NEXT: move $a5, $a4 @@ -154,8 +144,6 @@ define i16 @atomicrmw_umin_i16_acquire(ptr %a, i16 %b) nounwind { ; LA64-NEXT: sc.w $a5, $a2, 0 ; LA64-NEXT: beqz $a5, .LBB5_1 ; LA64-NEXT: # %bb.4: -; LA64-NEXT: dbar 1792 -; LA64-NEXT: # %bb.5: ; LA64-NEXT: srl.w $a0, $a4, $a0 ; LA64-NEXT: ret %1 = atomicrmw umin ptr %a, i16 %b acquire @@ -197,7 +185,6 @@ define i8 @atomicrmw_max_i8_acquire(ptr %a, i8 %b) nounwind { ; LA64-NEXT: andi $a4, $a0, 24 ; LA64-NEXT: xori $a4, $a4, 56 ; LA64-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 -; LA64-NEXT: dbar 0 ; LA64-NEXT: ll.w $a5, $a2, 0 ; LA64-NEXT: and $a7, $a5, $a3 ; LA64-NEXT: move $a6, $a5 @@ -212,8 +199,6 @@ define i8 @atomicrmw_max_i8_acquire(ptr %a, i8 %b) nounwind { ; LA64-NEXT: sc.w $a6, $a2, 0 ; LA64-NEXT: beqz $a6, .LBB8_1 ; LA64-NEXT: # %bb.4: -; LA64-NEXT: dbar 1792 -; LA64-NEXT: # %bb.5: ; LA64-NEXT: srl.w $a0, $a5, $a0 ; LA64-NEXT: ret %1 = atomicrmw max ptr %a, i8 %b acquire @@ -237,7 +222,6 @@ define i16 @atomicrmw_max_i16_acquire(ptr %a, i16 %b) nounwind { ; LA64-NEXT: sll.w $a1, $a1, $a0 ; LA64-NEXT: addi.w $a1, $a1, 0 ; LA64-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 -; LA64-NEXT: dbar 0 ; LA64-NEXT: ll.w $a5, $a2, 0 ; LA64-NEXT: and $a7, $a5, $a4 ; LA64-NEXT: move $a6, $a5 @@ -252,8 +236,6 @@ define i16 @atomicrmw_max_i16_acquire(ptr %a, i16 %b) nounwind { ; LA64-NEXT: sc.w $a6, $a2, 0 ; LA64-NEXT: beqz $a6, .LBB9_1 ; LA64-NEXT: # %bb.4: -; LA64-NEXT: dbar 1792 -; LA64-NEXT: # %bb.5: ; LA64-NEXT: srl.w $a0, $a5, $a0 ; LA64-NEXT: ret %1 = atomicrmw max ptr %a, i16 %b acquire @@ -295,7 +277,6 @@ define i8 @atomicrmw_min_i8_acquire(ptr %a, i8 %b) nounwind { ; LA64-NEXT: andi $a4, $a0, 24 ; LA64-NEXT: xori $a4, $a4, 56 ; LA64-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 -; LA64-NEXT: dbar 0 ; LA64-NEXT: ll.w $a5, $a2, 0 ; LA64-NEXT: and $a7, $a5, $a3 ; LA64-NEXT: move $a6, $a5 @@ -310,8 +291,6 @@ define i8 @atomicrmw_min_i8_acquire(ptr %a, i8 %b) nounwind { ; LA64-NEXT: sc.w $a6, $a2, 0 ; LA64-NEXT: beqz $a6, .LBB12_1 ; LA64-NEXT: # %bb.4: -; LA64-NEXT: dbar 1792 -; LA64-NEXT: # %bb.5: ; LA64-NEXT: srl.w $a0, $a5, $a0 ; LA64-NEXT: ret %1 = atomicrmw min ptr %a, i8 %b acquire @@ -335,7 +314,6 @@ define i16 @atomicrmw_min_i16_acquire(ptr %a, i16 %b) nounwind { ; LA64-NEXT: sll.w $a1, $a1, $a0 ; LA64-NEXT: addi.w $a1, $a1, 0 ; LA64-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 -; LA64-NEXT: dbar 0 ; LA64-NEXT: ll.w $a5, $a2, 0 ; LA64-NEXT: and $a7, $a5, $a4 ; LA64-NEXT: move $a6, $a5 @@ -350,8 +328,6 @@ define i16 @atomicrmw_min_i16_acquire(ptr %a, i16 %b) nounwind { ; LA64-NEXT: sc.w $a6, $a2, 0 ; LA64-NEXT: beqz $a6, .LBB13_1 ; LA64-NEXT: # %bb.4: -; LA64-NEXT: dbar 1792 -; LA64-NEXT: # %bb.5: ; LA64-NEXT: srl.w $a0, $a5, $a0 ; LA64-NEXT: ret %1 = atomicrmw min ptr %a, i16 %b acquire @@ -377,3 +353,1403 @@ define i64 @atomicrmw_min_i64_acquire(ptr %a, i64 %b) nounwind { %1 = atomicrmw min ptr %a, i64 %b acquire ret i64 %1 } + +define i8 @atomicrmw_umax_i8_release(ptr %a, i8 %b) nounwind { +; LA64-LABEL: atomicrmw_umax_i8_release: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: ori $a3, $zero, 255 +; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: andi $a1, $a1, 255 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a4, $a2, 0 +; LA64-NEXT: and $a6, $a4, $a3 +; LA64-NEXT: move $a5, $a4 +; LA64-NEXT: bgeu $a6, $a1, .LBB16_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB16_1 Depth=1 +; LA64-NEXT: xor $a5, $a4, $a1 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: .LBB16_3: # in Loop: Header=BB16_1 Depth=1 +; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: beqz $a5, .LBB16_1 +; LA64-NEXT: # %bb.4: +; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: ret + %1 = atomicrmw umax ptr %a, i8 %b release + ret i8 %1 +} + +define i16 @atomicrmw_umax_i16_release(ptr %a, i16 %b) nounwind { +; LA64-LABEL: atomicrmw_umax_i16_release: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: lu12i.w $a3, 15 +; LA64-NEXT: ori $a3, $a3, 4095 +; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a4, $a2, 0 +; LA64-NEXT: and $a6, $a4, $a3 +; LA64-NEXT: move $a5, $a4 +; LA64-NEXT: bgeu $a6, $a1, .LBB17_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB17_1 Depth=1 +; LA64-NEXT: xor $a5, $a4, $a1 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: .LBB17_3: # in Loop: Header=BB17_1 Depth=1 +; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: beqz $a5, .LBB17_1 +; LA64-NEXT: # %bb.4: +; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: ret + %1 = atomicrmw umax ptr %a, i16 %b release + ret i16 %1 +} + +define i32 @atomicrmw_umax_i32_release(ptr %a, i32 %b) nounwind { +; LA64-LABEL: atomicrmw_umax_i32_release: +; LA64: # %bb.0: +; LA64-NEXT: ammax_db.wu $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw umax ptr %a, i32 %b release + ret i32 %1 +} + +define i64 @atomicrmw_umax_i64_release(ptr %a, i64 %b) nounwind { +; LA64-LABEL: atomicrmw_umax_i64_release: +; LA64: # %bb.0: +; LA64-NEXT: ammax_db.du $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw umax ptr %a, i64 %b release + ret i64 %1 +} + +define i8 @atomicrmw_umin_i8_release(ptr %a, i8 %b) nounwind { +; LA64-LABEL: atomicrmw_umin_i8_release: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: ori $a3, $zero, 255 +; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: andi $a1, $a1, 255 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a4, $a2, 0 +; LA64-NEXT: and $a6, $a4, $a3 +; LA64-NEXT: move $a5, $a4 +; LA64-NEXT: bgeu $a1, $a6, .LBB20_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB20_1 Depth=1 +; LA64-NEXT: xor $a5, $a4, $a1 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: .LBB20_3: # in Loop: Header=BB20_1 Depth=1 +; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: beqz $a5, .LBB20_1 +; LA64-NEXT: # %bb.4: +; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: ret + %1 = atomicrmw umin ptr %a, i8 %b release + ret i8 %1 +} + +define i16 @atomicrmw_umin_i16_release(ptr %a, i16 %b) nounwind { +; LA64-LABEL: atomicrmw_umin_i16_release: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: lu12i.w $a3, 15 +; LA64-NEXT: ori $a3, $a3, 4095 +; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a4, $a2, 0 +; LA64-NEXT: and $a6, $a4, $a3 +; LA64-NEXT: move $a5, $a4 +; LA64-NEXT: bgeu $a1, $a6, .LBB21_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB21_1 Depth=1 +; LA64-NEXT: xor $a5, $a4, $a1 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: .LBB21_3: # in Loop: Header=BB21_1 Depth=1 +; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: beqz $a5, .LBB21_1 +; LA64-NEXT: # %bb.4: +; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: ret + %1 = atomicrmw umin ptr %a, i16 %b release + ret i16 %1 +} + +define i32 @atomicrmw_umin_i32_release(ptr %a, i32 %b) nounwind { +; LA64-LABEL: atomicrmw_umin_i32_release: +; LA64: # %bb.0: +; LA64-NEXT: ammin_db.wu $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw umin ptr %a, i32 %b release + ret i32 %1 +} + +define i64 @atomicrmw_umin_i64_release(ptr %a, i64 %b) nounwind { +; LA64-LABEL: atomicrmw_umin_i64_release: +; LA64: # %bb.0: +; LA64-NEXT: ammin_db.du $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw umin ptr %a, i64 %b release + ret i64 %1 +} + +define i8 @atomicrmw_max_i8_release(ptr %a, i8 %b) nounwind { +; LA64-LABEL: atomicrmw_max_i8_release: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: ori $a3, $zero, 255 +; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: ext.w.b $a1, $a1 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: andi $a4, $a0, 24 +; LA64-NEXT: xori $a4, $a4, 56 +; LA64-NEXT: .LBB24_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a5, $a2, 0 +; LA64-NEXT: and $a7, $a5, $a3 +; LA64-NEXT: move $a6, $a5 +; LA64-NEXT: sll.w $a7, $a7, $a4 +; LA64-NEXT: sra.w $a7, $a7, $a4 +; LA64-NEXT: bge $a7, $a1, .LBB24_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB24_1 Depth=1 +; LA64-NEXT: xor $a6, $a5, $a1 +; LA64-NEXT: and $a6, $a6, $a3 +; LA64-NEXT: xor $a6, $a5, $a6 +; LA64-NEXT: .LBB24_3: # in Loop: Header=BB24_1 Depth=1 +; LA64-NEXT: sc.w $a6, $a2, 0 +; LA64-NEXT: beqz $a6, .LBB24_1 +; LA64-NEXT: # %bb.4: +; LA64-NEXT: srl.w $a0, $a5, $a0 +; LA64-NEXT: ret + %1 = atomicrmw max ptr %a, i8 %b release + ret i8 %1 +} + +define i16 @atomicrmw_max_i16_release(ptr %a, i16 %b) nounwind { +; LA64-LABEL: atomicrmw_max_i16_release: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: andi $a3, $a0, 24 +; LA64-NEXT: ori $a4, $zero, 48 +; LA64-NEXT: sub.d $a3, $a4, $a3 +; LA64-NEXT: lu12i.w $a4, 15 +; LA64-NEXT: ori $a4, $a4, 4095 +; LA64-NEXT: sll.w $a4, $a4, $a0 +; LA64-NEXT: addi.w $a4, $a4, 0 +; LA64-NEXT: ext.w.h $a1, $a1 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB25_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a5, $a2, 0 +; LA64-NEXT: and $a7, $a5, $a4 +; LA64-NEXT: move $a6, $a5 +; LA64-NEXT: sll.w $a7, $a7, $a3 +; LA64-NEXT: sra.w $a7, $a7, $a3 +; LA64-NEXT: bge $a7, $a1, .LBB25_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB25_1 Depth=1 +; LA64-NEXT: xor $a6, $a5, $a1 +; LA64-NEXT: and $a6, $a6, $a4 +; LA64-NEXT: xor $a6, $a5, $a6 +; LA64-NEXT: .LBB25_3: # in Loop: Header=BB25_1 Depth=1 +; LA64-NEXT: sc.w $a6, $a2, 0 +; LA64-NEXT: beqz $a6, .LBB25_1 +; LA64-NEXT: # %bb.4: +; LA64-NEXT: srl.w $a0, $a5, $a0 +; LA64-NEXT: ret + %1 = atomicrmw max ptr %a, i16 %b release + ret i16 %1 +} + +define i32 @atomicrmw_max_i32_release(ptr %a, i32 %b) nounwind { +; LA64-LABEL: atomicrmw_max_i32_release: +; LA64: # %bb.0: +; LA64-NEXT: ammax_db.w $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw max ptr %a, i32 %b release + ret i32 %1 +} + +define i64 @atomicrmw_max_i64_release(ptr %a, i64 %b) nounwind { +; LA64-LABEL: atomicrmw_max_i64_release: +; LA64: # %bb.0: +; LA64-NEXT: ammax_db.d $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw max ptr %a, i64 %b release + ret i64 %1 +} + +define i8 @atomicrmw_min_i8_release(ptr %a, i8 %b) nounwind { +; LA64-LABEL: atomicrmw_min_i8_release: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: ori $a3, $zero, 255 +; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: ext.w.b $a1, $a1 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: andi $a4, $a0, 24 +; LA64-NEXT: xori $a4, $a4, 56 +; LA64-NEXT: .LBB28_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a5, $a2, 0 +; LA64-NEXT: and $a7, $a5, $a3 +; LA64-NEXT: move $a6, $a5 +; LA64-NEXT: sll.w $a7, $a7, $a4 +; LA64-NEXT: sra.w $a7, $a7, $a4 +; LA64-NEXT: bge $a1, $a7, .LBB28_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB28_1 Depth=1 +; LA64-NEXT: xor $a6, $a5, $a1 +; LA64-NEXT: and $a6, $a6, $a3 +; LA64-NEXT: xor $a6, $a5, $a6 +; LA64-NEXT: .LBB28_3: # in Loop: Header=BB28_1 Depth=1 +; LA64-NEXT: sc.w $a6, $a2, 0 +; LA64-NEXT: beqz $a6, .LBB28_1 +; LA64-NEXT: # %bb.4: +; LA64-NEXT: srl.w $a0, $a5, $a0 +; LA64-NEXT: ret + %1 = atomicrmw min ptr %a, i8 %b release + ret i8 %1 +} + +define i16 @atomicrmw_min_i16_release(ptr %a, i16 %b) nounwind { +; LA64-LABEL: atomicrmw_min_i16_release: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: andi $a3, $a0, 24 +; LA64-NEXT: ori $a4, $zero, 48 +; LA64-NEXT: sub.d $a3, $a4, $a3 +; LA64-NEXT: lu12i.w $a4, 15 +; LA64-NEXT: ori $a4, $a4, 4095 +; LA64-NEXT: sll.w $a4, $a4, $a0 +; LA64-NEXT: addi.w $a4, $a4, 0 +; LA64-NEXT: ext.w.h $a1, $a1 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB29_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a5, $a2, 0 +; LA64-NEXT: and $a7, $a5, $a4 +; LA64-NEXT: move $a6, $a5 +; LA64-NEXT: sll.w $a7, $a7, $a3 +; LA64-NEXT: sra.w $a7, $a7, $a3 +; LA64-NEXT: bge $a1, $a7, .LBB29_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB29_1 Depth=1 +; LA64-NEXT: xor $a6, $a5, $a1 +; LA64-NEXT: and $a6, $a6, $a4 +; LA64-NEXT: xor $a6, $a5, $a6 +; LA64-NEXT: .LBB29_3: # in Loop: Header=BB29_1 Depth=1 +; LA64-NEXT: sc.w $a6, $a2, 0 +; LA64-NEXT: beqz $a6, .LBB29_1 +; LA64-NEXT: # %bb.4: +; LA64-NEXT: srl.w $a0, $a5, $a0 +; LA64-NEXT: ret + %1 = atomicrmw min ptr %a, i16 %b release + ret i16 %1 +} + +define i32 @atomicrmw_min_i32_release(ptr %a, i32 %b) nounwind { +; LA64-LABEL: atomicrmw_min_i32_release: +; LA64: # %bb.0: +; LA64-NEXT: ammin_db.w $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw min ptr %a, i32 %b release + ret i32 %1 +} + +define i64 @atomicrmw_min_i64_release(ptr %a, i64 %b) nounwind { +; LA64-LABEL: atomicrmw_min_i64_release: +; LA64: # %bb.0: +; LA64-NEXT: ammin_db.d $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw min ptr %a, i64 %b release + ret i64 %1 +} + +define i8 @atomicrmw_umax_i8_acq_rel(ptr %a, i8 %b) nounwind { +; LA64-LABEL: atomicrmw_umax_i8_acq_rel: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: ori $a3, $zero, 255 +; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: andi $a1, $a1, 255 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB32_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a4, $a2, 0 +; LA64-NEXT: and $a6, $a4, $a3 +; LA64-NEXT: move $a5, $a4 +; LA64-NEXT: bgeu $a6, $a1, .LBB32_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB32_1 Depth=1 +; LA64-NEXT: xor $a5, $a4, $a1 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: .LBB32_3: # in Loop: Header=BB32_1 Depth=1 +; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: beqz $a5, .LBB32_1 +; LA64-NEXT: # %bb.4: +; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: ret + %1 = atomicrmw umax ptr %a, i8 %b acq_rel + ret i8 %1 +} + +define i16 @atomicrmw_umax_i16_acq_rel(ptr %a, i16 %b) nounwind { +; LA64-LABEL: atomicrmw_umax_i16_acq_rel: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: lu12i.w $a3, 15 +; LA64-NEXT: ori $a3, $a3, 4095 +; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB33_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a4, $a2, 0 +; LA64-NEXT: and $a6, $a4, $a3 +; LA64-NEXT: move $a5, $a4 +; LA64-NEXT: bgeu $a6, $a1, .LBB33_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB33_1 Depth=1 +; LA64-NEXT: xor $a5, $a4, $a1 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: .LBB33_3: # in Loop: Header=BB33_1 Depth=1 +; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: beqz $a5, .LBB33_1 +; LA64-NEXT: # %bb.4: +; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: ret + %1 = atomicrmw umax ptr %a, i16 %b acq_rel + ret i16 %1 +} + +define i32 @atomicrmw_umax_i32_acq_rel(ptr %a, i32 %b) nounwind { +; LA64-LABEL: atomicrmw_umax_i32_acq_rel: +; LA64: # %bb.0: +; LA64-NEXT: ammax_db.wu $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw umax ptr %a, i32 %b acq_rel + ret i32 %1 +} + +define i64 @atomicrmw_umax_i64_acq_rel(ptr %a, i64 %b) nounwind { +; LA64-LABEL: atomicrmw_umax_i64_acq_rel: +; LA64: # %bb.0: +; LA64-NEXT: ammax_db.du $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw umax ptr %a, i64 %b acq_rel + ret i64 %1 +} + +define i8 @atomicrmw_umin_i8_acq_rel(ptr %a, i8 %b) nounwind { +; LA64-LABEL: atomicrmw_umin_i8_acq_rel: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: ori $a3, $zero, 255 +; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: andi $a1, $a1, 255 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB36_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a4, $a2, 0 +; LA64-NEXT: and $a6, $a4, $a3 +; LA64-NEXT: move $a5, $a4 +; LA64-NEXT: bgeu $a1, $a6, .LBB36_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB36_1 Depth=1 +; LA64-NEXT: xor $a5, $a4, $a1 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: .LBB36_3: # in Loop: Header=BB36_1 Depth=1 +; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: beqz $a5, .LBB36_1 +; LA64-NEXT: # %bb.4: +; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: ret + %1 = atomicrmw umin ptr %a, i8 %b acq_rel + ret i8 %1 +} + +define i16 @atomicrmw_umin_i16_acq_rel(ptr %a, i16 %b) nounwind { +; LA64-LABEL: atomicrmw_umin_i16_acq_rel: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: lu12i.w $a3, 15 +; LA64-NEXT: ori $a3, $a3, 4095 +; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB37_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a4, $a2, 0 +; LA64-NEXT: and $a6, $a4, $a3 +; LA64-NEXT: move $a5, $a4 +; LA64-NEXT: bgeu $a1, $a6, .LBB37_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB37_1 Depth=1 +; LA64-NEXT: xor $a5, $a4, $a1 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: .LBB37_3: # in Loop: Header=BB37_1 Depth=1 +; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: beqz $a5, .LBB37_1 +; LA64-NEXT: # %bb.4: +; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: ret + %1 = atomicrmw umin ptr %a, i16 %b acq_rel + ret i16 %1 +} + +define i32 @atomicrmw_umin_i32_acq_rel(ptr %a, i32 %b) nounwind { +; LA64-LABEL: atomicrmw_umin_i32_acq_rel: +; LA64: # %bb.0: +; LA64-NEXT: ammin_db.wu $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw umin ptr %a, i32 %b acq_rel + ret i32 %1 +} + +define i64 @atomicrmw_umin_i64_acq_rel(ptr %a, i64 %b) nounwind { +; LA64-LABEL: atomicrmw_umin_i64_acq_rel: +; LA64: # %bb.0: +; LA64-NEXT: ammin_db.du $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw umin ptr %a, i64 %b acq_rel + ret i64 %1 +} + +define i8 @atomicrmw_max_i8_acq_rel(ptr %a, i8 %b) nounwind { +; LA64-LABEL: atomicrmw_max_i8_acq_rel: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: ori $a3, $zero, 255 +; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: ext.w.b $a1, $a1 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: andi $a4, $a0, 24 +; LA64-NEXT: xori $a4, $a4, 56 +; LA64-NEXT: .LBB40_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a5, $a2, 0 +; LA64-NEXT: and $a7, $a5, $a3 +; LA64-NEXT: move $a6, $a5 +; LA64-NEXT: sll.w $a7, $a7, $a4 +; LA64-NEXT: sra.w $a7, $a7, $a4 +; LA64-NEXT: bge $a7, $a1, .LBB40_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB40_1 Depth=1 +; LA64-NEXT: xor $a6, $a5, $a1 +; LA64-NEXT: and $a6, $a6, $a3 +; LA64-NEXT: xor $a6, $a5, $a6 +; LA64-NEXT: .LBB40_3: # in Loop: Header=BB40_1 Depth=1 +; LA64-NEXT: sc.w $a6, $a2, 0 +; LA64-NEXT: beqz $a6, .LBB40_1 +; LA64-NEXT: # %bb.4: +; LA64-NEXT: srl.w $a0, $a5, $a0 +; LA64-NEXT: ret + %1 = atomicrmw max ptr %a, i8 %b acq_rel + ret i8 %1 +} + +define i16 @atomicrmw_max_i16_acq_rel(ptr %a, i16 %b) nounwind { +; LA64-LABEL: atomicrmw_max_i16_acq_rel: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: andi $a3, $a0, 24 +; LA64-NEXT: ori $a4, $zero, 48 +; LA64-NEXT: sub.d $a3, $a4, $a3 +; LA64-NEXT: lu12i.w $a4, 15 +; LA64-NEXT: ori $a4, $a4, 4095 +; LA64-NEXT: sll.w $a4, $a4, $a0 +; LA64-NEXT: addi.w $a4, $a4, 0 +; LA64-NEXT: ext.w.h $a1, $a1 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB41_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a5, $a2, 0 +; LA64-NEXT: and $a7, $a5, $a4 +; LA64-NEXT: move $a6, $a5 +; LA64-NEXT: sll.w $a7, $a7, $a3 +; LA64-NEXT: sra.w $a7, $a7, $a3 +; LA64-NEXT: bge $a7, $a1, .LBB41_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB41_1 Depth=1 +; LA64-NEXT: xor $a6, $a5, $a1 +; LA64-NEXT: and $a6, $a6, $a4 +; LA64-NEXT: xor $a6, $a5, $a6 +; LA64-NEXT: .LBB41_3: # in Loop: Header=BB41_1 Depth=1 +; LA64-NEXT: sc.w $a6, $a2, 0 +; LA64-NEXT: beqz $a6, .LBB41_1 +; LA64-NEXT: # %bb.4: +; LA64-NEXT: srl.w $a0, $a5, $a0 +; LA64-NEXT: ret + %1 = atomicrmw max ptr %a, i16 %b acq_rel + ret i16 %1 +} + +define i32 @atomicrmw_max_i32_acq_rel(ptr %a, i32 %b) nounwind { +; LA64-LABEL: atomicrmw_max_i32_acq_rel: +; LA64: # %bb.0: +; LA64-NEXT: ammax_db.w $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw max ptr %a, i32 %b acq_rel + ret i32 %1 +} + +define i64 @atomicrmw_max_i64_acq_rel(ptr %a, i64 %b) nounwind { +; LA64-LABEL: atomicrmw_max_i64_acq_rel: +; LA64: # %bb.0: +; LA64-NEXT: ammax_db.d $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw max ptr %a, i64 %b acq_rel + ret i64 %1 +} + +define i8 @atomicrmw_min_i8_acq_rel(ptr %a, i8 %b) nounwind { +; LA64-LABEL: atomicrmw_min_i8_acq_rel: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: ori $a3, $zero, 255 +; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: ext.w.b $a1, $a1 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: andi $a4, $a0, 24 +; LA64-NEXT: xori $a4, $a4, 56 +; LA64-NEXT: .LBB44_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a5, $a2, 0 +; LA64-NEXT: and $a7, $a5, $a3 +; LA64-NEXT: move $a6, $a5 +; LA64-NEXT: sll.w $a7, $a7, $a4 +; LA64-NEXT: sra.w $a7, $a7, $a4 +; LA64-NEXT: bge $a1, $a7, .LBB44_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB44_1 Depth=1 +; LA64-NEXT: xor $a6, $a5, $a1 +; LA64-NEXT: and $a6, $a6, $a3 +; LA64-NEXT: xor $a6, $a5, $a6 +; LA64-NEXT: .LBB44_3: # in Loop: Header=BB44_1 Depth=1 +; LA64-NEXT: sc.w $a6, $a2, 0 +; LA64-NEXT: beqz $a6, .LBB44_1 +; LA64-NEXT: # %bb.4: +; LA64-NEXT: srl.w $a0, $a5, $a0 +; LA64-NEXT: ret + %1 = atomicrmw min ptr %a, i8 %b acq_rel + ret i8 %1 +} + +define i16 @atomicrmw_min_i16_acq_rel(ptr %a, i16 %b) nounwind { +; LA64-LABEL: atomicrmw_min_i16_acq_rel: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: andi $a3, $a0, 24 +; LA64-NEXT: ori $a4, $zero, 48 +; LA64-NEXT: sub.d $a3, $a4, $a3 +; LA64-NEXT: lu12i.w $a4, 15 +; LA64-NEXT: ori $a4, $a4, 4095 +; LA64-NEXT: sll.w $a4, $a4, $a0 +; LA64-NEXT: addi.w $a4, $a4, 0 +; LA64-NEXT: ext.w.h $a1, $a1 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB45_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a5, $a2, 0 +; LA64-NEXT: and $a7, $a5, $a4 +; LA64-NEXT: move $a6, $a5 +; LA64-NEXT: sll.w $a7, $a7, $a3 +; LA64-NEXT: sra.w $a7, $a7, $a3 +; LA64-NEXT: bge $a1, $a7, .LBB45_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB45_1 Depth=1 +; LA64-NEXT: xor $a6, $a5, $a1 +; LA64-NEXT: and $a6, $a6, $a4 +; LA64-NEXT: xor $a6, $a5, $a6 +; LA64-NEXT: .LBB45_3: # in Loop: Header=BB45_1 Depth=1 +; LA64-NEXT: sc.w $a6, $a2, 0 +; LA64-NEXT: beqz $a6, .LBB45_1 +; LA64-NEXT: # %bb.4: +; LA64-NEXT: srl.w $a0, $a5, $a0 +; LA64-NEXT: ret + %1 = atomicrmw min ptr %a, i16 %b acq_rel + ret i16 %1 +} + +define i32 @atomicrmw_min_i32_acq_rel(ptr %a, i32 %b) nounwind { +; LA64-LABEL: atomicrmw_min_i32_acq_rel: +; LA64: # %bb.0: +; LA64-NEXT: ammin_db.w $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw min ptr %a, i32 %b acq_rel + ret i32 %1 +} + +define i64 @atomicrmw_min_i64_acq_rel(ptr %a, i64 %b) nounwind { +; LA64-LABEL: atomicrmw_min_i64_acq_rel: +; LA64: # %bb.0: +; LA64-NEXT: ammin_db.d $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw min ptr %a, i64 %b acq_rel + ret i64 %1 +} + +define i8 @atomicrmw_umax_i8_seq_cst(ptr %a, i8 %b) nounwind { +; LA64-LABEL: atomicrmw_umax_i8_seq_cst: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: ori $a3, $zero, 255 +; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: andi $a1, $a1, 255 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB48_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a4, $a2, 0 +; LA64-NEXT: and $a6, $a4, $a3 +; LA64-NEXT: move $a5, $a4 +; LA64-NEXT: bgeu $a6, $a1, .LBB48_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB48_1 Depth=1 +; LA64-NEXT: xor $a5, $a4, $a1 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: .LBB48_3: # in Loop: Header=BB48_1 Depth=1 +; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: beqz $a5, .LBB48_1 +; LA64-NEXT: # %bb.4: +; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: ret + %1 = atomicrmw umax ptr %a, i8 %b seq_cst + ret i8 %1 +} + +define i16 @atomicrmw_umax_i16_seq_cst(ptr %a, i16 %b) nounwind { +; LA64-LABEL: atomicrmw_umax_i16_seq_cst: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: lu12i.w $a3, 15 +; LA64-NEXT: ori $a3, $a3, 4095 +; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB49_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a4, $a2, 0 +; LA64-NEXT: and $a6, $a4, $a3 +; LA64-NEXT: move $a5, $a4 +; LA64-NEXT: bgeu $a6, $a1, .LBB49_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB49_1 Depth=1 +; LA64-NEXT: xor $a5, $a4, $a1 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: .LBB49_3: # in Loop: Header=BB49_1 Depth=1 +; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: beqz $a5, .LBB49_1 +; LA64-NEXT: # %bb.4: +; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: ret + %1 = atomicrmw umax ptr %a, i16 %b seq_cst + ret i16 %1 +} + +define i32 @atomicrmw_umax_i32_seq_cst(ptr %a, i32 %b) nounwind { +; LA64-LABEL: atomicrmw_umax_i32_seq_cst: +; LA64: # %bb.0: +; LA64-NEXT: ammax_db.wu $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw umax ptr %a, i32 %b seq_cst + ret i32 %1 +} + +define i64 @atomicrmw_umax_i64_seq_cst(ptr %a, i64 %b) nounwind { +; LA64-LABEL: atomicrmw_umax_i64_seq_cst: +; LA64: # %bb.0: +; LA64-NEXT: ammax_db.du $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw umax ptr %a, i64 %b seq_cst + ret i64 %1 +} + +define i8 @atomicrmw_umin_i8_seq_cst(ptr %a, i8 %b) nounwind { +; LA64-LABEL: atomicrmw_umin_i8_seq_cst: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: ori $a3, $zero, 255 +; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: andi $a1, $a1, 255 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB52_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a4, $a2, 0 +; LA64-NEXT: and $a6, $a4, $a3 +; LA64-NEXT: move $a5, $a4 +; LA64-NEXT: bgeu $a1, $a6, .LBB52_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB52_1 Depth=1 +; LA64-NEXT: xor $a5, $a4, $a1 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: .LBB52_3: # in Loop: Header=BB52_1 Depth=1 +; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: beqz $a5, .LBB52_1 +; LA64-NEXT: # %bb.4: +; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: ret + %1 = atomicrmw umin ptr %a, i8 %b seq_cst + ret i8 %1 +} + +define i16 @atomicrmw_umin_i16_seq_cst(ptr %a, i16 %b) nounwind { +; LA64-LABEL: atomicrmw_umin_i16_seq_cst: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: lu12i.w $a3, 15 +; LA64-NEXT: ori $a3, $a3, 4095 +; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB53_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a4, $a2, 0 +; LA64-NEXT: and $a6, $a4, $a3 +; LA64-NEXT: move $a5, $a4 +; LA64-NEXT: bgeu $a1, $a6, .LBB53_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB53_1 Depth=1 +; LA64-NEXT: xor $a5, $a4, $a1 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: .LBB53_3: # in Loop: Header=BB53_1 Depth=1 +; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: beqz $a5, .LBB53_1 +; LA64-NEXT: # %bb.4: +; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: ret + %1 = atomicrmw umin ptr %a, i16 %b seq_cst + ret i16 %1 +} + +define i32 @atomicrmw_umin_i32_seq_cst(ptr %a, i32 %b) nounwind { +; LA64-LABEL: atomicrmw_umin_i32_seq_cst: +; LA64: # %bb.0: +; LA64-NEXT: ammin_db.wu $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw umin ptr %a, i32 %b seq_cst + ret i32 %1 +} + +define i64 @atomicrmw_umin_i64_seq_cst(ptr %a, i64 %b) nounwind { +; LA64-LABEL: atomicrmw_umin_i64_seq_cst: +; LA64: # %bb.0: +; LA64-NEXT: ammin_db.du $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw umin ptr %a, i64 %b seq_cst + ret i64 %1 +} + +define i8 @atomicrmw_max_i8_seq_cst(ptr %a, i8 %b) nounwind { +; LA64-LABEL: atomicrmw_max_i8_seq_cst: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: ori $a3, $zero, 255 +; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: ext.w.b $a1, $a1 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: andi $a4, $a0, 24 +; LA64-NEXT: xori $a4, $a4, 56 +; LA64-NEXT: .LBB56_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a5, $a2, 0 +; LA64-NEXT: and $a7, $a5, $a3 +; LA64-NEXT: move $a6, $a5 +; LA64-NEXT: sll.w $a7, $a7, $a4 +; LA64-NEXT: sra.w $a7, $a7, $a4 +; LA64-NEXT: bge $a7, $a1, .LBB56_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB56_1 Depth=1 +; LA64-NEXT: xor $a6, $a5, $a1 +; LA64-NEXT: and $a6, $a6, $a3 +; LA64-NEXT: xor $a6, $a5, $a6 +; LA64-NEXT: .LBB56_3: # in Loop: Header=BB56_1 Depth=1 +; LA64-NEXT: sc.w $a6, $a2, 0 +; LA64-NEXT: beqz $a6, .LBB56_1 +; LA64-NEXT: # %bb.4: +; LA64-NEXT: srl.w $a0, $a5, $a0 +; LA64-NEXT: ret + %1 = atomicrmw max ptr %a, i8 %b seq_cst + ret i8 %1 +} + +define i16 @atomicrmw_max_i16_seq_cst(ptr %a, i16 %b) nounwind { +; LA64-LABEL: atomicrmw_max_i16_seq_cst: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: andi $a3, $a0, 24 +; LA64-NEXT: ori $a4, $zero, 48 +; LA64-NEXT: sub.d $a3, $a4, $a3 +; LA64-NEXT: lu12i.w $a4, 15 +; LA64-NEXT: ori $a4, $a4, 4095 +; LA64-NEXT: sll.w $a4, $a4, $a0 +; LA64-NEXT: addi.w $a4, $a4, 0 +; LA64-NEXT: ext.w.h $a1, $a1 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB57_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a5, $a2, 0 +; LA64-NEXT: and $a7, $a5, $a4 +; LA64-NEXT: move $a6, $a5 +; LA64-NEXT: sll.w $a7, $a7, $a3 +; LA64-NEXT: sra.w $a7, $a7, $a3 +; LA64-NEXT: bge $a7, $a1, .LBB57_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB57_1 Depth=1 +; LA64-NEXT: xor $a6, $a5, $a1 +; LA64-NEXT: and $a6, $a6, $a4 +; LA64-NEXT: xor $a6, $a5, $a6 +; LA64-NEXT: .LBB57_3: # in Loop: Header=BB57_1 Depth=1 +; LA64-NEXT: sc.w $a6, $a2, 0 +; LA64-NEXT: beqz $a6, .LBB57_1 +; LA64-NEXT: # %bb.4: +; LA64-NEXT: srl.w $a0, $a5, $a0 +; LA64-NEXT: ret + %1 = atomicrmw max ptr %a, i16 %b seq_cst + ret i16 %1 +} + +define i32 @atomicrmw_max_i32_seq_cst(ptr %a, i32 %b) nounwind { +; LA64-LABEL: atomicrmw_max_i32_seq_cst: +; LA64: # %bb.0: +; LA64-NEXT: ammax_db.w $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw max ptr %a, i32 %b seq_cst + ret i32 %1 +} + +define i64 @atomicrmw_max_i64_seq_cst(ptr %a, i64 %b) nounwind { +; LA64-LABEL: atomicrmw_max_i64_seq_cst: +; LA64: # %bb.0: +; LA64-NEXT: ammax_db.d $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw max ptr %a, i64 %b seq_cst + ret i64 %1 +} + +define i8 @atomicrmw_min_i8_seq_cst(ptr %a, i8 %b) nounwind { +; LA64-LABEL: atomicrmw_min_i8_seq_cst: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: ori $a3, $zero, 255 +; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: ext.w.b $a1, $a1 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: andi $a4, $a0, 24 +; LA64-NEXT: xori $a4, $a4, 56 +; LA64-NEXT: .LBB60_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a5, $a2, 0 +; LA64-NEXT: and $a7, $a5, $a3 +; LA64-NEXT: move $a6, $a5 +; LA64-NEXT: sll.w $a7, $a7, $a4 +; LA64-NEXT: sra.w $a7, $a7, $a4 +; LA64-NEXT: bge $a1, $a7, .LBB60_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB60_1 Depth=1 +; LA64-NEXT: xor $a6, $a5, $a1 +; LA64-NEXT: and $a6, $a6, $a3 +; LA64-NEXT: xor $a6, $a5, $a6 +; LA64-NEXT: .LBB60_3: # in Loop: Header=BB60_1 Depth=1 +; LA64-NEXT: sc.w $a6, $a2, 0 +; LA64-NEXT: beqz $a6, .LBB60_1 +; LA64-NEXT: # %bb.4: +; LA64-NEXT: srl.w $a0, $a5, $a0 +; LA64-NEXT: ret + %1 = atomicrmw min ptr %a, i8 %b seq_cst + ret i8 %1 +} + +define i16 @atomicrmw_min_i16_seq_cst(ptr %a, i16 %b) nounwind { +; LA64-LABEL: atomicrmw_min_i16_seq_cst: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: andi $a3, $a0, 24 +; LA64-NEXT: ori $a4, $zero, 48 +; LA64-NEXT: sub.d $a3, $a4, $a3 +; LA64-NEXT: lu12i.w $a4, 15 +; LA64-NEXT: ori $a4, $a4, 4095 +; LA64-NEXT: sll.w $a4, $a4, $a0 +; LA64-NEXT: addi.w $a4, $a4, 0 +; LA64-NEXT: ext.w.h $a1, $a1 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB61_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a5, $a2, 0 +; LA64-NEXT: and $a7, $a5, $a4 +; LA64-NEXT: move $a6, $a5 +; LA64-NEXT: sll.w $a7, $a7, $a3 +; LA64-NEXT: sra.w $a7, $a7, $a3 +; LA64-NEXT: bge $a1, $a7, .LBB61_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB61_1 Depth=1 +; LA64-NEXT: xor $a6, $a5, $a1 +; LA64-NEXT: and $a6, $a6, $a4 +; LA64-NEXT: xor $a6, $a5, $a6 +; LA64-NEXT: .LBB61_3: # in Loop: Header=BB61_1 Depth=1 +; LA64-NEXT: sc.w $a6, $a2, 0 +; LA64-NEXT: beqz $a6, .LBB61_1 +; LA64-NEXT: # %bb.4: +; LA64-NEXT: srl.w $a0, $a5, $a0 +; LA64-NEXT: ret + %1 = atomicrmw min ptr %a, i16 %b seq_cst + ret i16 %1 +} + +define i32 @atomicrmw_min_i32_seq_cst(ptr %a, i32 %b) nounwind { +; LA64-LABEL: atomicrmw_min_i32_seq_cst: +; LA64: # %bb.0: +; LA64-NEXT: ammin_db.w $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw min ptr %a, i32 %b seq_cst + ret i32 %1 +} + +define i64 @atomicrmw_min_i64_seq_cst(ptr %a, i64 %b) nounwind { +; LA64-LABEL: atomicrmw_min_i64_seq_cst: +; LA64: # %bb.0: +; LA64-NEXT: ammin_db.d $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw min ptr %a, i64 %b seq_cst + ret i64 %1 +} + +define i8 @atomicrmw_umax_i8_monotonic(ptr %a, i8 %b) nounwind { +; LA64-LABEL: atomicrmw_umax_i8_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: ori $a3, $zero, 255 +; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: andi $a1, $a1, 255 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB64_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a4, $a2, 0 +; LA64-NEXT: and $a6, $a4, $a3 +; LA64-NEXT: move $a5, $a4 +; LA64-NEXT: bgeu $a6, $a1, .LBB64_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB64_1 Depth=1 +; LA64-NEXT: xor $a5, $a4, $a1 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: .LBB64_3: # in Loop: Header=BB64_1 Depth=1 +; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: beqz $a5, .LBB64_1 +; LA64-NEXT: # %bb.4: +; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: ret + %1 = atomicrmw umax ptr %a, i8 %b monotonic + ret i8 %1 +} + +define i16 @atomicrmw_umax_i16_monotonic(ptr %a, i16 %b) nounwind { +; LA64-LABEL: atomicrmw_umax_i16_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: lu12i.w $a3, 15 +; LA64-NEXT: ori $a3, $a3, 4095 +; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB65_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a4, $a2, 0 +; LA64-NEXT: and $a6, $a4, $a3 +; LA64-NEXT: move $a5, $a4 +; LA64-NEXT: bgeu $a6, $a1, .LBB65_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB65_1 Depth=1 +; LA64-NEXT: xor $a5, $a4, $a1 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: .LBB65_3: # in Loop: Header=BB65_1 Depth=1 +; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: beqz $a5, .LBB65_1 +; LA64-NEXT: # %bb.4: +; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: ret + %1 = atomicrmw umax ptr %a, i16 %b monotonic + ret i16 %1 +} + +define i32 @atomicrmw_umax_i32_monotonic(ptr %a, i32 %b) nounwind { +; LA64-LABEL: atomicrmw_umax_i32_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: ammax_db.wu $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw umax ptr %a, i32 %b monotonic + ret i32 %1 +} + +define i64 @atomicrmw_umax_i64_monotonic(ptr %a, i64 %b) nounwind { +; LA64-LABEL: atomicrmw_umax_i64_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: ammax_db.du $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw umax ptr %a, i64 %b monotonic + ret i64 %1 +} + +define i8 @atomicrmw_umin_i8_monotonic(ptr %a, i8 %b) nounwind { +; LA64-LABEL: atomicrmw_umin_i8_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: ori $a3, $zero, 255 +; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: andi $a1, $a1, 255 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB68_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a4, $a2, 0 +; LA64-NEXT: and $a6, $a4, $a3 +; LA64-NEXT: move $a5, $a4 +; LA64-NEXT: bgeu $a1, $a6, .LBB68_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB68_1 Depth=1 +; LA64-NEXT: xor $a5, $a4, $a1 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: .LBB68_3: # in Loop: Header=BB68_1 Depth=1 +; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: beqz $a5, .LBB68_1 +; LA64-NEXT: # %bb.4: +; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: ret + %1 = atomicrmw umin ptr %a, i8 %b monotonic + ret i8 %1 +} + +define i16 @atomicrmw_umin_i16_monotonic(ptr %a, i16 %b) nounwind { +; LA64-LABEL: atomicrmw_umin_i16_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: lu12i.w $a3, 15 +; LA64-NEXT: ori $a3, $a3, 4095 +; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB69_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a4, $a2, 0 +; LA64-NEXT: and $a6, $a4, $a3 +; LA64-NEXT: move $a5, $a4 +; LA64-NEXT: bgeu $a1, $a6, .LBB69_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB69_1 Depth=1 +; LA64-NEXT: xor $a5, $a4, $a1 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: .LBB69_3: # in Loop: Header=BB69_1 Depth=1 +; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: beqz $a5, .LBB69_1 +; LA64-NEXT: # %bb.4: +; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: ret + %1 = atomicrmw umin ptr %a, i16 %b monotonic + ret i16 %1 +} + +define i32 @atomicrmw_umin_i32_monotonic(ptr %a, i32 %b) nounwind { +; LA64-LABEL: atomicrmw_umin_i32_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: ammin_db.wu $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw umin ptr %a, i32 %b monotonic + ret i32 %1 +} + +define i64 @atomicrmw_umin_i64_monotonic(ptr %a, i64 %b) nounwind { +; LA64-LABEL: atomicrmw_umin_i64_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: ammin_db.du $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw umin ptr %a, i64 %b monotonic + ret i64 %1 +} + +define i8 @atomicrmw_max_i8_monotonic(ptr %a, i8 %b) nounwind { +; LA64-LABEL: atomicrmw_max_i8_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: ori $a3, $zero, 255 +; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: ext.w.b $a1, $a1 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: andi $a4, $a0, 24 +; LA64-NEXT: xori $a4, $a4, 56 +; LA64-NEXT: .LBB72_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a5, $a2, 0 +; LA64-NEXT: and $a7, $a5, $a3 +; LA64-NEXT: move $a6, $a5 +; LA64-NEXT: sll.w $a7, $a7, $a4 +; LA64-NEXT: sra.w $a7, $a7, $a4 +; LA64-NEXT: bge $a7, $a1, .LBB72_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB72_1 Depth=1 +; LA64-NEXT: xor $a6, $a5, $a1 +; LA64-NEXT: and $a6, $a6, $a3 +; LA64-NEXT: xor $a6, $a5, $a6 +; LA64-NEXT: .LBB72_3: # in Loop: Header=BB72_1 Depth=1 +; LA64-NEXT: sc.w $a6, $a2, 0 +; LA64-NEXT: beqz $a6, .LBB72_1 +; LA64-NEXT: # %bb.4: +; LA64-NEXT: srl.w $a0, $a5, $a0 +; LA64-NEXT: ret + %1 = atomicrmw max ptr %a, i8 %b monotonic + ret i8 %1 +} + +define i16 @atomicrmw_max_i16_monotonic(ptr %a, i16 %b) nounwind { +; LA64-LABEL: atomicrmw_max_i16_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: andi $a3, $a0, 24 +; LA64-NEXT: ori $a4, $zero, 48 +; LA64-NEXT: sub.d $a3, $a4, $a3 +; LA64-NEXT: lu12i.w $a4, 15 +; LA64-NEXT: ori $a4, $a4, 4095 +; LA64-NEXT: sll.w $a4, $a4, $a0 +; LA64-NEXT: addi.w $a4, $a4, 0 +; LA64-NEXT: ext.w.h $a1, $a1 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB73_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a5, $a2, 0 +; LA64-NEXT: and $a7, $a5, $a4 +; LA64-NEXT: move $a6, $a5 +; LA64-NEXT: sll.w $a7, $a7, $a3 +; LA64-NEXT: sra.w $a7, $a7, $a3 +; LA64-NEXT: bge $a7, $a1, .LBB73_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB73_1 Depth=1 +; LA64-NEXT: xor $a6, $a5, $a1 +; LA64-NEXT: and $a6, $a6, $a4 +; LA64-NEXT: xor $a6, $a5, $a6 +; LA64-NEXT: .LBB73_3: # in Loop: Header=BB73_1 Depth=1 +; LA64-NEXT: sc.w $a6, $a2, 0 +; LA64-NEXT: beqz $a6, .LBB73_1 +; LA64-NEXT: # %bb.4: +; LA64-NEXT: srl.w $a0, $a5, $a0 +; LA64-NEXT: ret + %1 = atomicrmw max ptr %a, i16 %b monotonic + ret i16 %1 +} + +define i32 @atomicrmw_max_i32_monotonic(ptr %a, i32 %b) nounwind { +; LA64-LABEL: atomicrmw_max_i32_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: ammax_db.w $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw max ptr %a, i32 %b monotonic + ret i32 %1 +} + +define i64 @atomicrmw_max_i64_monotonic(ptr %a, i64 %b) nounwind { +; LA64-LABEL: atomicrmw_max_i64_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: ammax_db.d $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw max ptr %a, i64 %b monotonic + ret i64 %1 +} + +define i8 @atomicrmw_min_i8_monotonic(ptr %a, i8 %b) nounwind { +; LA64-LABEL: atomicrmw_min_i8_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: ori $a3, $zero, 255 +; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: ext.w.b $a1, $a1 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: andi $a4, $a0, 24 +; LA64-NEXT: xori $a4, $a4, 56 +; LA64-NEXT: .LBB76_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a5, $a2, 0 +; LA64-NEXT: and $a7, $a5, $a3 +; LA64-NEXT: move $a6, $a5 +; LA64-NEXT: sll.w $a7, $a7, $a4 +; LA64-NEXT: sra.w $a7, $a7, $a4 +; LA64-NEXT: bge $a1, $a7, .LBB76_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB76_1 Depth=1 +; LA64-NEXT: xor $a6, $a5, $a1 +; LA64-NEXT: and $a6, $a6, $a3 +; LA64-NEXT: xor $a6, $a5, $a6 +; LA64-NEXT: .LBB76_3: # in Loop: Header=BB76_1 Depth=1 +; LA64-NEXT: sc.w $a6, $a2, 0 +; LA64-NEXT: beqz $a6, .LBB76_1 +; LA64-NEXT: # %bb.4: +; LA64-NEXT: srl.w $a0, $a5, $a0 +; LA64-NEXT: ret + %1 = atomicrmw min ptr %a, i8 %b monotonic + ret i8 %1 +} + +define i16 @atomicrmw_min_i16_monotonic(ptr %a, i16 %b) nounwind { +; LA64-LABEL: atomicrmw_min_i16_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: andi $a3, $a0, 24 +; LA64-NEXT: ori $a4, $zero, 48 +; LA64-NEXT: sub.d $a3, $a4, $a3 +; LA64-NEXT: lu12i.w $a4, 15 +; LA64-NEXT: ori $a4, $a4, 4095 +; LA64-NEXT: sll.w $a4, $a4, $a0 +; LA64-NEXT: addi.w $a4, $a4, 0 +; LA64-NEXT: ext.w.h $a1, $a1 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB77_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a5, $a2, 0 +; LA64-NEXT: and $a7, $a5, $a4 +; LA64-NEXT: move $a6, $a5 +; LA64-NEXT: sll.w $a7, $a7, $a3 +; LA64-NEXT: sra.w $a7, $a7, $a3 +; LA64-NEXT: bge $a1, $a7, .LBB77_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB77_1 Depth=1 +; LA64-NEXT: xor $a6, $a5, $a1 +; LA64-NEXT: and $a6, $a6, $a4 +; LA64-NEXT: xor $a6, $a5, $a6 +; LA64-NEXT: .LBB77_3: # in Loop: Header=BB77_1 Depth=1 +; LA64-NEXT: sc.w $a6, $a2, 0 +; LA64-NEXT: beqz $a6, .LBB77_1 +; LA64-NEXT: # %bb.4: +; LA64-NEXT: srl.w $a0, $a5, $a0 +; LA64-NEXT: ret + %1 = atomicrmw min ptr %a, i16 %b monotonic + ret i16 %1 +} + +define i32 @atomicrmw_min_i32_monotonic(ptr %a, i32 %b) nounwind { +; LA64-LABEL: atomicrmw_min_i32_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: ammin_db.w $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw min ptr %a, i32 %b monotonic + ret i32 %1 +} + +define i64 @atomicrmw_min_i64_monotonic(ptr %a, i64 %b) nounwind { +; LA64-LABEL: atomicrmw_min_i64_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: ammin_db.d $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw min ptr %a, i64 %b monotonic + ret i64 %1 +} diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw.ll index c077d14f728f7eeab81119b7dbcbc51f6208b5bb..94a26e4ed9c74c3e3ef30628065dac802f6bc548 100644 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw.ll @@ -13,7 +13,6 @@ define i8 @atomicrmw_xchg_i8_acquire(ptr %a, i8 %b) nounwind { ; LA32-NEXT: andi $a1, $a1, 255 ; LA32-NEXT: sll.w $a1, $a1, $a0 ; LA32-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 -; LA32-NEXT: dbar 0 ; LA32-NEXT: ll.w $a4, $a2, 0 ; LA32-NEXT: addi.w $a5, $a1, 0 ; LA32-NEXT: xor $a5, $a4, $a5 @@ -37,7 +36,6 @@ define i8 @atomicrmw_xchg_i8_acquire(ptr %a, i8 %b) nounwind { ; LA64-NEXT: sll.w $a1, $a1, $a0 ; LA64-NEXT: addi.w $a1, $a1, 0 ; LA64-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 -; LA64-NEXT: dbar 0 ; LA64-NEXT: ll.w $a4, $a2, 0 ; LA64-NEXT: addi.w $a5, $a1, 0 ; LA64-NEXT: xor $a5, $a4, $a5 @@ -64,7 +62,6 @@ define i16 @atomicrmw_xchg_i16_acquire(ptr %a, i16 %b) nounwind { ; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 ; LA32-NEXT: sll.w $a1, $a1, $a0 ; LA32-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 -; LA32-NEXT: dbar 0 ; LA32-NEXT: ll.w $a4, $a2, 0 ; LA32-NEXT: addi.w $a5, $a1, 0 ; LA32-NEXT: xor $a5, $a4, $a5 @@ -89,7 +86,6 @@ define i16 @atomicrmw_xchg_i16_acquire(ptr %a, i16 %b) nounwind { ; LA64-NEXT: sll.w $a1, $a1, $a0 ; LA64-NEXT: addi.w $a1, $a1, 0 ; LA64-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 -; LA64-NEXT: dbar 0 ; LA64-NEXT: ll.w $a4, $a2, 0 ; LA64-NEXT: addi.w $a5, $a1, 0 ; LA64-NEXT: xor $a5, $a4, $a5 @@ -108,7 +104,6 @@ define i32 @atomicrmw_xchg_i32_acquire(ptr %a, i32 %b) nounwind { ; LA32-LABEL: atomicrmw_xchg_i32_acquire: ; LA32: # %bb.0: ; LA32-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 -; LA32-NEXT: dbar 0 ; LA32-NEXT: ll.w $a2, $a0, 0 ; LA32-NEXT: move $a3, $a1 ; LA32-NEXT: sc.w $a3, $a0, 0 @@ -157,7 +152,6 @@ define i8 @atomicrmw_add_i8_acquire(ptr %a, i8 %b) nounwind { ; LA32-NEXT: andi $a1, $a1, 255 ; LA32-NEXT: sll.w $a1, $a1, $a0 ; LA32-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 -; LA32-NEXT: dbar 0 ; LA32-NEXT: ll.w $a4, $a2, 0 ; LA32-NEXT: add.w $a5, $a4, $a1 ; LA32-NEXT: xor $a5, $a4, $a5 @@ -181,7 +175,6 @@ define i8 @atomicrmw_add_i8_acquire(ptr %a, i8 %b) nounwind { ; LA64-NEXT: sll.w $a1, $a1, $a0 ; LA64-NEXT: addi.w $a1, $a1, 0 ; LA64-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 -; LA64-NEXT: dbar 0 ; LA64-NEXT: ll.w $a4, $a2, 0 ; LA64-NEXT: add.w $a5, $a4, $a1 ; LA64-NEXT: xor $a5, $a4, $a5 @@ -208,7 +201,6 @@ define i16 @atomicrmw_add_i16_acquire(ptr %a, i16 %b) nounwind { ; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 ; LA32-NEXT: sll.w $a1, $a1, $a0 ; LA32-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 -; LA32-NEXT: dbar 0 ; LA32-NEXT: ll.w $a4, $a2, 0 ; LA32-NEXT: add.w $a5, $a4, $a1 ; LA32-NEXT: xor $a5, $a4, $a5 @@ -233,7 +225,6 @@ define i16 @atomicrmw_add_i16_acquire(ptr %a, i16 %b) nounwind { ; LA64-NEXT: sll.w $a1, $a1, $a0 ; LA64-NEXT: addi.w $a1, $a1, 0 ; LA64-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 -; LA64-NEXT: dbar 0 ; LA64-NEXT: ll.w $a4, $a2, 0 ; LA64-NEXT: add.w $a5, $a4, $a1 ; LA64-NEXT: xor $a5, $a4, $a5 @@ -252,7 +243,6 @@ define i32 @atomicrmw_add_i32_acquire(ptr %a, i32 %b) nounwind { ; LA32-LABEL: atomicrmw_add_i32_acquire: ; LA32: # %bb.0: ; LA32-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 -; LA32-NEXT: dbar 0 ; LA32-NEXT: ll.w $a2, $a0, 0 ; LA32-NEXT: add.w $a3, $a2, $a1 ; LA32-NEXT: sc.w $a3, $a0, 0 @@ -301,7 +291,6 @@ define i8 @atomicrmw_sub_i8_acquire(ptr %a, i8 %b) nounwind { ; LA32-NEXT: andi $a1, $a1, 255 ; LA32-NEXT: sll.w $a1, $a1, $a0 ; LA32-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 -; LA32-NEXT: dbar 0 ; LA32-NEXT: ll.w $a4, $a2, 0 ; LA32-NEXT: sub.w $a5, $a4, $a1 ; LA32-NEXT: xor $a5, $a4, $a5 @@ -325,7 +314,6 @@ define i8 @atomicrmw_sub_i8_acquire(ptr %a, i8 %b) nounwind { ; LA64-NEXT: sll.w $a1, $a1, $a0 ; LA64-NEXT: addi.w $a1, $a1, 0 ; LA64-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 -; LA64-NEXT: dbar 0 ; LA64-NEXT: ll.w $a4, $a2, 0 ; LA64-NEXT: sub.w $a5, $a4, $a1 ; LA64-NEXT: xor $a5, $a4, $a5 @@ -352,7 +340,6 @@ define i16 @atomicrmw_sub_i16_acquire(ptr %a, i16 %b) nounwind { ; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 ; LA32-NEXT: sll.w $a1, $a1, $a0 ; LA32-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 -; LA32-NEXT: dbar 0 ; LA32-NEXT: ll.w $a4, $a2, 0 ; LA32-NEXT: sub.w $a5, $a4, $a1 ; LA32-NEXT: xor $a5, $a4, $a5 @@ -377,7 +364,6 @@ define i16 @atomicrmw_sub_i16_acquire(ptr %a, i16 %b) nounwind { ; LA64-NEXT: sll.w $a1, $a1, $a0 ; LA64-NEXT: addi.w $a1, $a1, 0 ; LA64-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 -; LA64-NEXT: dbar 0 ; LA64-NEXT: ll.w $a4, $a2, 0 ; LA64-NEXT: sub.w $a5, $a4, $a1 ; LA64-NEXT: xor $a5, $a4, $a5 @@ -396,7 +382,6 @@ define i32 @atomicrmw_sub_i32_acquire(ptr %a, i32 %b) nounwind { ; LA32-LABEL: atomicrmw_sub_i32_acquire: ; LA32: # %bb.0: ; LA32-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 -; LA32-NEXT: dbar 0 ; LA32-NEXT: ll.w $a2, $a0, 0 ; LA32-NEXT: sub.w $a3, $a2, $a1 ; LA32-NEXT: sc.w $a3, $a0, 0 @@ -447,7 +432,6 @@ define i8 @atomicrmw_nand_i8_acquire(ptr %a, i8 %b) nounwind { ; LA32-NEXT: andi $a1, $a1, 255 ; LA32-NEXT: sll.w $a1, $a1, $a0 ; LA32-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 -; LA32-NEXT: dbar 0 ; LA32-NEXT: ll.w $a4, $a2, 0 ; LA32-NEXT: and $a5, $a4, $a1 ; LA32-NEXT: nor $a5, $a5, $zero @@ -472,7 +456,6 @@ define i8 @atomicrmw_nand_i8_acquire(ptr %a, i8 %b) nounwind { ; LA64-NEXT: sll.w $a1, $a1, $a0 ; LA64-NEXT: addi.w $a1, $a1, 0 ; LA64-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 -; LA64-NEXT: dbar 0 ; LA64-NEXT: ll.w $a4, $a2, 0 ; LA64-NEXT: and $a5, $a4, $a1 ; LA64-NEXT: nor $a5, $a5, $zero @@ -500,7 +483,6 @@ define i16 @atomicrmw_nand_i16_acquire(ptr %a, i16 %b) nounwind { ; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 ; LA32-NEXT: sll.w $a1, $a1, $a0 ; LA32-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 -; LA32-NEXT: dbar 0 ; LA32-NEXT: ll.w $a4, $a2, 0 ; LA32-NEXT: and $a5, $a4, $a1 ; LA32-NEXT: nor $a5, $a5, $zero @@ -526,7 +508,6 @@ define i16 @atomicrmw_nand_i16_acquire(ptr %a, i16 %b) nounwind { ; LA64-NEXT: sll.w $a1, $a1, $a0 ; LA64-NEXT: addi.w $a1, $a1, 0 ; LA64-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 -; LA64-NEXT: dbar 0 ; LA64-NEXT: ll.w $a4, $a2, 0 ; LA64-NEXT: and $a5, $a4, $a1 ; LA64-NEXT: nor $a5, $a5, $zero @@ -546,7 +527,6 @@ define i32 @atomicrmw_nand_i32_acquire(ptr %a, i32 %b) nounwind { ; LA32-LABEL: atomicrmw_nand_i32_acquire: ; LA32: # %bb.0: ; LA32-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 -; LA32-NEXT: dbar 0 ; LA32-NEXT: ll.w $a2, $a0, 0 ; LA32-NEXT: and $a3, $a2, $a1 ; LA32-NEXT: nor $a3, $a3, $zero @@ -559,7 +539,6 @@ define i32 @atomicrmw_nand_i32_acquire(ptr %a, i32 %b) nounwind { ; LA64-LABEL: atomicrmw_nand_i32_acquire: ; LA64: # %bb.0: ; LA64-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 -; LA64-NEXT: dbar 0 ; LA64-NEXT: ll.w $a2, $a0, 0 ; LA64-NEXT: and $a3, $a2, $a1 ; LA64-NEXT: nor $a3, $a3, $zero @@ -586,7 +565,6 @@ define i64 @atomicrmw_nand_i64_acquire(ptr %a, i64 %b) nounwind { ; LA64-LABEL: atomicrmw_nand_i64_acquire: ; LA64: # %bb.0: ; LA64-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 -; LA64-NEXT: dbar 0 ; LA64-NEXT: ll.d $a2, $a0, 0 ; LA64-NEXT: and $a3, $a2, $a1 ; LA64-NEXT: nor $a3, $a3, $zero @@ -611,7 +589,6 @@ define i8 @atomicrmw_and_i8_acquire(ptr %a, i8 %b) nounwind { ; LA32-NEXT: addi.w $a3, $zero, -4 ; LA32-NEXT: and $a0, $a0, $a3 ; LA32-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 -; LA32-NEXT: dbar 0 ; LA32-NEXT: ll.w $a3, $a0, 0 ; LA32-NEXT: and $a4, $a3, $a1 ; LA32-NEXT: sc.w $a4, $a0, 0 @@ -650,7 +627,6 @@ define i16 @atomicrmw_and_i16_acquire(ptr %a, i16 %b) nounwind { ; LA32-NEXT: addi.w $a2, $zero, -4 ; LA32-NEXT: and $a0, $a0, $a2 ; LA32-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1 -; LA32-NEXT: dbar 0 ; LA32-NEXT: ll.w $a2, $a0, 0 ; LA32-NEXT: and $a4, $a2, $a1 ; LA32-NEXT: sc.w $a4, $a0, 0 @@ -681,7 +657,6 @@ define i32 @atomicrmw_and_i32_acquire(ptr %a, i32 %b) nounwind { ; LA32-LABEL: atomicrmw_and_i32_acquire: ; LA32: # %bb.0: ; LA32-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1 -; LA32-NEXT: dbar 0 ; LA32-NEXT: ll.w $a2, $a0, 0 ; LA32-NEXT: and $a3, $a2, $a1 ; LA32-NEXT: sc.w $a3, $a0, 0 @@ -728,7 +703,6 @@ define i8 @atomicrmw_or_i8_acquire(ptr %a, i8 %b) nounwind { ; LA32-NEXT: andi $a1, $a1, 255 ; LA32-NEXT: sll.w $a1, $a1, $a0 ; LA32-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1 -; LA32-NEXT: dbar 0 ; LA32-NEXT: ll.w $a3, $a2, 0 ; LA32-NEXT: or $a4, $a3, $a1 ; LA32-NEXT: sc.w $a4, $a2, 0 @@ -760,7 +734,6 @@ define i16 @atomicrmw_or_i16_acquire(ptr %a, i16 %b) nounwind { ; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 ; LA32-NEXT: sll.w $a1, $a1, $a0 ; LA32-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1 -; LA32-NEXT: dbar 0 ; LA32-NEXT: ll.w $a3, $a2, 0 ; LA32-NEXT: or $a4, $a3, $a1 ; LA32-NEXT: sc.w $a4, $a2, 0 @@ -787,7 +760,6 @@ define i32 @atomicrmw_or_i32_acquire(ptr %a, i32 %b) nounwind { ; LA32-LABEL: atomicrmw_or_i32_acquire: ; LA32: # %bb.0: ; LA32-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1 -; LA32-NEXT: dbar 0 ; LA32-NEXT: ll.w $a2, $a0, 0 ; LA32-NEXT: or $a3, $a2, $a1 ; LA32-NEXT: sc.w $a3, $a0, 0 @@ -834,7 +806,6 @@ define i8 @atomicrmw_xor_i8_acquire(ptr %a, i8 %b) nounwind { ; LA32-NEXT: andi $a1, $a1, 255 ; LA32-NEXT: sll.w $a1, $a1, $a0 ; LA32-NEXT: .LBB24_1: # =>This Inner Loop Header: Depth=1 -; LA32-NEXT: dbar 0 ; LA32-NEXT: ll.w $a3, $a2, 0 ; LA32-NEXT: xor $a4, $a3, $a1 ; LA32-NEXT: sc.w $a4, $a2, 0 @@ -866,7 +837,6 @@ define i16 @atomicrmw_xor_i16_acquire(ptr %a, i16 %b) nounwind { ; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 ; LA32-NEXT: sll.w $a1, $a1, $a0 ; LA32-NEXT: .LBB25_1: # =>This Inner Loop Header: Depth=1 -; LA32-NEXT: dbar 0 ; LA32-NEXT: ll.w $a3, $a2, 0 ; LA32-NEXT: xor $a4, $a3, $a1 ; LA32-NEXT: sc.w $a4, $a2, 0 @@ -893,7 +863,6 @@ define i32 @atomicrmw_xor_i32_acquire(ptr %a, i32 %b) nounwind { ; LA32-LABEL: atomicrmw_xor_i32_acquire: ; LA32: # %bb.0: ; LA32-NEXT: .LBB26_1: # =>This Inner Loop Header: Depth=1 -; LA32-NEXT: dbar 0 ; LA32-NEXT: ll.w $a2, $a0, 0 ; LA32-NEXT: xor $a3, $a2, $a1 ; LA32-NEXT: sc.w $a3, $a0, 0 @@ -931,6 +900,3228 @@ define i64 @atomicrmw_xor_i64_acquire(ptr %a, i64 %b) nounwind { ret i64 %1 } +define i8 @atomicrmw_xchg_i8_release(ptr %a, i8 %b) nounwind { +; LA32-LABEL: atomicrmw_xchg_i8_release: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a2, $zero, -4 +; LA32-NEXT: and $a2, $a0, $a2 +; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: ori $a3, $zero, 255 +; LA32-NEXT: sll.w $a3, $a3, $a0 +; LA32-NEXT: andi $a1, $a1, 255 +; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: .LBB28_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a4, $a2, 0 +; LA32-NEXT: addi.w $a5, $a1, 0 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a2, 0 +; LA32-NEXT: beqz $a5, .LBB28_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_xchg_i8_release: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: ori $a3, $zero, 255 +; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: andi $a1, $a1, 255 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB28_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a4, $a2, 0 +; LA64-NEXT: addi.w $a5, $a1, 0 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: beqz $a5, .LBB28_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: ret + %1 = atomicrmw xchg ptr %a, i8 %b release + ret i8 %1 +} + +define i8 @atomicrmw_xchg_0_i8_release(ptr %a) nounwind { +; LA32-LABEL: atomicrmw_xchg_0_i8_release: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a1, $zero, -4 +; LA32-NEXT: and $a1, $a0, $a1 +; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: ori $a2, $zero, 255 +; LA32-NEXT: sll.w $a2, $a2, $a0 +; LA32-NEXT: .LBB29_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a3, $a1, 0 +; LA32-NEXT: addi.w $a4, $zero, 0 +; LA32-NEXT: xor $a4, $a3, $a4 +; LA32-NEXT: and $a4, $a4, $a2 +; LA32-NEXT: xor $a4, $a3, $a4 +; LA32-NEXT: sc.w $a4, $a1, 0 +; LA32-NEXT: beqz $a4, .LBB29_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a3, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_xchg_0_i8_release: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a1, $zero, -4 +; LA64-NEXT: and $a1, $a0, $a1 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: ori $a2, $zero, 255 +; LA64-NEXT: sll.w $a2, $a2, $a0 +; LA64-NEXT: addi.w $a2, $a2, 0 +; LA64-NEXT: .LBB29_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a3, $a1, 0 +; LA64-NEXT: addi.w $a4, $zero, 0 +; LA64-NEXT: xor $a4, $a3, $a4 +; LA64-NEXT: and $a4, $a4, $a2 +; LA64-NEXT: xor $a4, $a3, $a4 +; LA64-NEXT: sc.w $a4, $a1, 0 +; LA64-NEXT: beqz $a4, .LBB29_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: srl.w $a0, $a3, $a0 +; LA64-NEXT: ret + %1 = atomicrmw xchg ptr %a, i8 0 release + ret i8 %1 +} + +define i8 @atomicrmw_xchg_minus_1_i8_release(ptr %a) nounwind { +; LA32-LABEL: atomicrmw_xchg_minus_1_i8_release: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a1, $zero, -4 +; LA32-NEXT: and $a1, $a0, $a1 +; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: ori $a2, $zero, 255 +; LA32-NEXT: sll.w $a2, $a2, $a0 +; LA32-NEXT: .LBB30_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a3, $a1, 0 +; LA32-NEXT: addi.w $a4, $a2, 0 +; LA32-NEXT: xor $a4, $a3, $a4 +; LA32-NEXT: and $a4, $a4, $a2 +; LA32-NEXT: xor $a4, $a3, $a4 +; LA32-NEXT: sc.w $a4, $a1, 0 +; LA32-NEXT: beqz $a4, .LBB30_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a3, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_xchg_minus_1_i8_release: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a1, $zero, -4 +; LA64-NEXT: and $a1, $a0, $a1 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: ori $a2, $zero, 255 +; LA64-NEXT: sll.w $a2, $a2, $a0 +; LA64-NEXT: addi.w $a2, $a2, 0 +; LA64-NEXT: .LBB30_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a3, $a1, 0 +; LA64-NEXT: addi.w $a4, $a2, 0 +; LA64-NEXT: xor $a4, $a3, $a4 +; LA64-NEXT: and $a4, $a4, $a2 +; LA64-NEXT: xor $a4, $a3, $a4 +; LA64-NEXT: sc.w $a4, $a1, 0 +; LA64-NEXT: beqz $a4, .LBB30_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: srl.w $a0, $a3, $a0 +; LA64-NEXT: ret + %1 = atomicrmw xchg ptr %a, i8 -1 release + ret i8 %1 +} + +define i16 @atomicrmw_xchg_i16_release(ptr %a, i16 %b) nounwind { +; LA32-LABEL: atomicrmw_xchg_i16_release: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a2, $zero, -4 +; LA32-NEXT: and $a2, $a0, $a2 +; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: lu12i.w $a3, 15 +; LA32-NEXT: ori $a3, $a3, 4095 +; LA32-NEXT: sll.w $a3, $a3, $a0 +; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 +; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: .LBB31_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a4, $a2, 0 +; LA32-NEXT: addi.w $a5, $a1, 0 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a2, 0 +; LA32-NEXT: beqz $a5, .LBB31_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_xchg_i16_release: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: lu12i.w $a3, 15 +; LA64-NEXT: ori $a3, $a3, 4095 +; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB31_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a4, $a2, 0 +; LA64-NEXT: addi.w $a5, $a1, 0 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: beqz $a5, .LBB31_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: ret + %1 = atomicrmw xchg ptr %a, i16 %b release + ret i16 %1 +} + +define i16 @atomicrmw_xchg_0_i16_release(ptr %a) nounwind { +; LA32-LABEL: atomicrmw_xchg_0_i16_release: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a1, $zero, -4 +; LA32-NEXT: and $a1, $a0, $a1 +; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: lu12i.w $a2, 15 +; LA32-NEXT: ori $a2, $a2, 4095 +; LA32-NEXT: sll.w $a2, $a2, $a0 +; LA32-NEXT: .LBB32_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a3, $a1, 0 +; LA32-NEXT: addi.w $a4, $zero, 0 +; LA32-NEXT: xor $a4, $a3, $a4 +; LA32-NEXT: and $a4, $a4, $a2 +; LA32-NEXT: xor $a4, $a3, $a4 +; LA32-NEXT: sc.w $a4, $a1, 0 +; LA32-NEXT: beqz $a4, .LBB32_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a3, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_xchg_0_i16_release: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a1, $zero, -4 +; LA64-NEXT: and $a1, $a0, $a1 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: lu12i.w $a2, 15 +; LA64-NEXT: ori $a2, $a2, 4095 +; LA64-NEXT: sll.w $a2, $a2, $a0 +; LA64-NEXT: addi.w $a2, $a2, 0 +; LA64-NEXT: .LBB32_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a3, $a1, 0 +; LA64-NEXT: addi.w $a4, $zero, 0 +; LA64-NEXT: xor $a4, $a3, $a4 +; LA64-NEXT: and $a4, $a4, $a2 +; LA64-NEXT: xor $a4, $a3, $a4 +; LA64-NEXT: sc.w $a4, $a1, 0 +; LA64-NEXT: beqz $a4, .LBB32_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: srl.w $a0, $a3, $a0 +; LA64-NEXT: ret + %1 = atomicrmw xchg ptr %a, i16 0 release + ret i16 %1 +} + +define i16 @atomicrmw_xchg_minus_1_i16_release(ptr %a) nounwind { +; LA32-LABEL: atomicrmw_xchg_minus_1_i16_release: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a1, $zero, -4 +; LA32-NEXT: and $a1, $a0, $a1 +; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: lu12i.w $a2, 15 +; LA32-NEXT: ori $a2, $a2, 4095 +; LA32-NEXT: sll.w $a2, $a2, $a0 +; LA32-NEXT: .LBB33_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a3, $a1, 0 +; LA32-NEXT: addi.w $a4, $a2, 0 +; LA32-NEXT: xor $a4, $a3, $a4 +; LA32-NEXT: and $a4, $a4, $a2 +; LA32-NEXT: xor $a4, $a3, $a4 +; LA32-NEXT: sc.w $a4, $a1, 0 +; LA32-NEXT: beqz $a4, .LBB33_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a3, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_xchg_minus_1_i16_release: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a1, $zero, -4 +; LA64-NEXT: and $a1, $a0, $a1 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: lu12i.w $a2, 15 +; LA64-NEXT: ori $a2, $a2, 4095 +; LA64-NEXT: sll.w $a2, $a2, $a0 +; LA64-NEXT: addi.w $a2, $a2, 0 +; LA64-NEXT: .LBB33_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a3, $a1, 0 +; LA64-NEXT: addi.w $a4, $a2, 0 +; LA64-NEXT: xor $a4, $a3, $a4 +; LA64-NEXT: and $a4, $a4, $a2 +; LA64-NEXT: xor $a4, $a3, $a4 +; LA64-NEXT: sc.w $a4, $a1, 0 +; LA64-NEXT: beqz $a4, .LBB33_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: srl.w $a0, $a3, $a0 +; LA64-NEXT: ret + %1 = atomicrmw xchg ptr %a, i16 -1 release + ret i16 %1 +} + +define i32 @atomicrmw_xchg_i32_release(ptr %a, i32 %b) nounwind { +; LA32-LABEL: atomicrmw_xchg_i32_release: +; LA32: # %bb.0: +; LA32-NEXT: .LBB34_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a2, $a0, 0 +; LA32-NEXT: move $a3, $a1 +; LA32-NEXT: sc.w $a3, $a0, 0 +; LA32-NEXT: beqz $a3, .LBB34_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_xchg_i32_release: +; LA64: # %bb.0: +; LA64-NEXT: amswap_db.w $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw xchg ptr %a, i32 %b release + ret i32 %1 +} + +define i64 @atomicrmw_xchg_i64_release(ptr %a, i64 %b) nounwind { +; LA32-LABEL: atomicrmw_xchg_i64_release: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a3, $zero, 3 +; LA32-NEXT: bl %plt(__atomic_exchange_8) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_xchg_i64_release: +; LA64: # %bb.0: +; LA64-NEXT: amswap_db.d $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw xchg ptr %a, i64 %b release + ret i64 %1 +} + +define i8 @atomicrmw_add_i8_release(ptr %a, i8 %b) nounwind { +; LA32-LABEL: atomicrmw_add_i8_release: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a2, $zero, -4 +; LA32-NEXT: and $a2, $a0, $a2 +; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: ori $a3, $zero, 255 +; LA32-NEXT: sll.w $a3, $a3, $a0 +; LA32-NEXT: andi $a1, $a1, 255 +; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: .LBB36_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a4, $a2, 0 +; LA32-NEXT: add.w $a5, $a4, $a1 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a2, 0 +; LA32-NEXT: beqz $a5, .LBB36_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_add_i8_release: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: ori $a3, $zero, 255 +; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: andi $a1, $a1, 255 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB36_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a4, $a2, 0 +; LA64-NEXT: add.w $a5, $a4, $a1 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: beqz $a5, .LBB36_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: ret + %1 = atomicrmw add ptr %a, i8 %b release + ret i8 %1 +} + +define i16 @atomicrmw_add_i16_release(ptr %a, i16 %b) nounwind { +; LA32-LABEL: atomicrmw_add_i16_release: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a2, $zero, -4 +; LA32-NEXT: and $a2, $a0, $a2 +; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: lu12i.w $a3, 15 +; LA32-NEXT: ori $a3, $a3, 4095 +; LA32-NEXT: sll.w $a3, $a3, $a0 +; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 +; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: .LBB37_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a4, $a2, 0 +; LA32-NEXT: add.w $a5, $a4, $a1 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a2, 0 +; LA32-NEXT: beqz $a5, .LBB37_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_add_i16_release: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: lu12i.w $a3, 15 +; LA64-NEXT: ori $a3, $a3, 4095 +; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB37_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a4, $a2, 0 +; LA64-NEXT: add.w $a5, $a4, $a1 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: beqz $a5, .LBB37_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: ret + %1 = atomicrmw add ptr %a, i16 %b release + ret i16 %1 +} + +define i32 @atomicrmw_add_i32_release(ptr %a, i32 %b) nounwind { +; LA32-LABEL: atomicrmw_add_i32_release: +; LA32: # %bb.0: +; LA32-NEXT: .LBB38_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a2, $a0, 0 +; LA32-NEXT: add.w $a3, $a2, $a1 +; LA32-NEXT: sc.w $a3, $a0, 0 +; LA32-NEXT: beqz $a3, .LBB38_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_add_i32_release: +; LA64: # %bb.0: +; LA64-NEXT: amadd_db.w $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw add ptr %a, i32 %b release + ret i32 %1 +} + +define i64 @atomicrmw_add_i64_release(ptr %a, i64 %b) nounwind { +; LA32-LABEL: atomicrmw_add_i64_release: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a3, $zero, 3 +; LA32-NEXT: bl %plt(__atomic_fetch_add_8) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_add_i64_release: +; LA64: # %bb.0: +; LA64-NEXT: amadd_db.d $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw add ptr %a, i64 %b release + ret i64 %1 +} + +define i8 @atomicrmw_sub_i8_release(ptr %a, i8 %b) nounwind { +; LA32-LABEL: atomicrmw_sub_i8_release: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a2, $zero, -4 +; LA32-NEXT: and $a2, $a0, $a2 +; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: ori $a3, $zero, 255 +; LA32-NEXT: sll.w $a3, $a3, $a0 +; LA32-NEXT: andi $a1, $a1, 255 +; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: .LBB40_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a4, $a2, 0 +; LA32-NEXT: sub.w $a5, $a4, $a1 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a2, 0 +; LA32-NEXT: beqz $a5, .LBB40_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_sub_i8_release: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: ori $a3, $zero, 255 +; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: andi $a1, $a1, 255 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB40_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a4, $a2, 0 +; LA64-NEXT: sub.w $a5, $a4, $a1 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: beqz $a5, .LBB40_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: ret + %1 = atomicrmw sub ptr %a, i8 %b release + ret i8 %1 +} + +define i16 @atomicrmw_sub_i16_release(ptr %a, i16 %b) nounwind { +; LA32-LABEL: atomicrmw_sub_i16_release: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a2, $zero, -4 +; LA32-NEXT: and $a2, $a0, $a2 +; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: lu12i.w $a3, 15 +; LA32-NEXT: ori $a3, $a3, 4095 +; LA32-NEXT: sll.w $a3, $a3, $a0 +; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 +; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: .LBB41_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a4, $a2, 0 +; LA32-NEXT: sub.w $a5, $a4, $a1 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a2, 0 +; LA32-NEXT: beqz $a5, .LBB41_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_sub_i16_release: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: lu12i.w $a3, 15 +; LA64-NEXT: ori $a3, $a3, 4095 +; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB41_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a4, $a2, 0 +; LA64-NEXT: sub.w $a5, $a4, $a1 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: beqz $a5, .LBB41_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: ret + %1 = atomicrmw sub ptr %a, i16 %b release + ret i16 %1 +} + +define i32 @atomicrmw_sub_i32_release(ptr %a, i32 %b) nounwind { +; LA32-LABEL: atomicrmw_sub_i32_release: +; LA32: # %bb.0: +; LA32-NEXT: .LBB42_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a2, $a0, 0 +; LA32-NEXT: sub.w $a3, $a2, $a1 +; LA32-NEXT: sc.w $a3, $a0, 0 +; LA32-NEXT: beqz $a3, .LBB42_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_sub_i32_release: +; LA64: # %bb.0: +; LA64-NEXT: sub.w $a2, $zero, $a1 +; LA64-NEXT: amadd_db.w $a1, $a2, $a0 +; LA64-NEXT: move $a0, $a1 +; LA64-NEXT: ret + %1 = atomicrmw sub ptr %a, i32 %b release + ret i32 %1 +} + +define i64 @atomicrmw_sub_i64_release(ptr %a, i64 %b) nounwind { +; LA32-LABEL: atomicrmw_sub_i64_release: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a3, $zero, 3 +; LA32-NEXT: bl %plt(__atomic_fetch_sub_8) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_sub_i64_release: +; LA64: # %bb.0: +; LA64-NEXT: sub.d $a2, $zero, $a1 +; LA64-NEXT: amadd_db.d $a1, $a2, $a0 +; LA64-NEXT: move $a0, $a1 +; LA64-NEXT: ret + %1 = atomicrmw sub ptr %a, i64 %b release + ret i64 %1 +} + +define i8 @atomicrmw_nand_i8_release(ptr %a, i8 %b) nounwind { +; LA32-LABEL: atomicrmw_nand_i8_release: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a2, $zero, -4 +; LA32-NEXT: and $a2, $a0, $a2 +; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: ori $a3, $zero, 255 +; LA32-NEXT: sll.w $a3, $a3, $a0 +; LA32-NEXT: andi $a1, $a1, 255 +; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: .LBB44_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a4, $a2, 0 +; LA32-NEXT: and $a5, $a4, $a1 +; LA32-NEXT: nor $a5, $a5, $zero +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a2, 0 +; LA32-NEXT: beqz $a5, .LBB44_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_nand_i8_release: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: ori $a3, $zero, 255 +; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: andi $a1, $a1, 255 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB44_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a4, $a2, 0 +; LA64-NEXT: and $a5, $a4, $a1 +; LA64-NEXT: nor $a5, $a5, $zero +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: beqz $a5, .LBB44_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: ret + %1 = atomicrmw nand ptr %a, i8 %b release + ret i8 %1 +} + +define i16 @atomicrmw_nand_i16_release(ptr %a, i16 %b) nounwind { +; LA32-LABEL: atomicrmw_nand_i16_release: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a2, $zero, -4 +; LA32-NEXT: and $a2, $a0, $a2 +; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: lu12i.w $a3, 15 +; LA32-NEXT: ori $a3, $a3, 4095 +; LA32-NEXT: sll.w $a3, $a3, $a0 +; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 +; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: .LBB45_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a4, $a2, 0 +; LA32-NEXT: and $a5, $a4, $a1 +; LA32-NEXT: nor $a5, $a5, $zero +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a2, 0 +; LA32-NEXT: beqz $a5, .LBB45_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_nand_i16_release: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: lu12i.w $a3, 15 +; LA64-NEXT: ori $a3, $a3, 4095 +; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB45_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a4, $a2, 0 +; LA64-NEXT: and $a5, $a4, $a1 +; LA64-NEXT: nor $a5, $a5, $zero +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: beqz $a5, .LBB45_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: ret + %1 = atomicrmw nand ptr %a, i16 %b release + ret i16 %1 +} + +define i32 @atomicrmw_nand_i32_release(ptr %a, i32 %b) nounwind { +; LA32-LABEL: atomicrmw_nand_i32_release: +; LA32: # %bb.0: +; LA32-NEXT: .LBB46_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a2, $a0, 0 +; LA32-NEXT: and $a3, $a2, $a1 +; LA32-NEXT: nor $a3, $a3, $zero +; LA32-NEXT: sc.w $a3, $a0, 0 +; LA32-NEXT: beqz $a3, .LBB46_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_nand_i32_release: +; LA64: # %bb.0: +; LA64-NEXT: .LBB46_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a2, $a0, 0 +; LA64-NEXT: and $a3, $a2, $a1 +; LA64-NEXT: nor $a3, $a3, $zero +; LA64-NEXT: sc.w $a3, $a0, 0 +; LA64-NEXT: beqz $a3, .LBB46_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw nand ptr %a, i32 %b release + ret i32 %1 +} + +define i64 @atomicrmw_nand_i64_release(ptr %a, i64 %b) nounwind { +; LA32-LABEL: atomicrmw_nand_i64_release: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a3, $zero, 3 +; LA32-NEXT: bl %plt(__atomic_fetch_nand_8) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_nand_i64_release: +; LA64: # %bb.0: +; LA64-NEXT: .LBB47_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.d $a2, $a0, 0 +; LA64-NEXT: and $a3, $a2, $a1 +; LA64-NEXT: nor $a3, $a3, $zero +; LA64-NEXT: sc.d $a3, $a0, 0 +; LA64-NEXT: beqz $a3, .LBB47_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw nand ptr %a, i64 %b release + ret i64 %1 +} + +define i8 @atomicrmw_and_i8_release(ptr %a, i8 %b) nounwind { +; LA32-LABEL: atomicrmw_and_i8_release: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a2, $a0, 3 +; LA32-NEXT: ori $a3, $zero, 255 +; LA32-NEXT: sll.w $a3, $a3, $a2 +; LA32-NEXT: andi $a1, $a1, 255 +; LA32-NEXT: sll.w $a1, $a1, $a2 +; LA32-NEXT: orn $a1, $a1, $a3 +; LA32-NEXT: addi.w $a3, $zero, -4 +; LA32-NEXT: and $a0, $a0, $a3 +; LA32-NEXT: .LBB48_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a3, $a0, 0 +; LA32-NEXT: and $a4, $a3, $a1 +; LA32-NEXT: sc.w $a4, $a0, 0 +; LA32-NEXT: beqz $a4, .LBB48_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a3, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_and_i8_release: +; LA64: # %bb.0: +; LA64-NEXT: slli.d $a2, $a0, 3 +; LA64-NEXT: ori $a3, $zero, 255 +; LA64-NEXT: sll.w $a3, $a3, $a2 +; LA64-NEXT: andi $a1, $a1, 255 +; LA64-NEXT: sll.w $a1, $a1, $a2 +; LA64-NEXT: orn $a1, $a1, $a3 +; LA64-NEXT: addi.w $a3, $zero, -4 +; LA64-NEXT: and $a0, $a0, $a3 +; LA64-NEXT: amand_db.w $a3, $a1, $a0 +; LA64-NEXT: srl.w $a0, $a3, $a2 +; LA64-NEXT: ret + %1 = atomicrmw and ptr %a, i8 %b release + ret i8 %1 +} + +define i16 @atomicrmw_and_i16_release(ptr %a, i16 %b) nounwind { +; LA32-LABEL: atomicrmw_and_i16_release: +; LA32: # %bb.0: +; LA32-NEXT: lu12i.w $a2, 15 +; LA32-NEXT: ori $a2, $a2, 4095 +; LA32-NEXT: slli.w $a3, $a0, 3 +; LA32-NEXT: sll.w $a2, $a2, $a3 +; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 +; LA32-NEXT: sll.w $a1, $a1, $a3 +; LA32-NEXT: orn $a1, $a1, $a2 +; LA32-NEXT: addi.w $a2, $zero, -4 +; LA32-NEXT: and $a0, $a0, $a2 +; LA32-NEXT: .LBB49_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a2, $a0, 0 +; LA32-NEXT: and $a4, $a2, $a1 +; LA32-NEXT: sc.w $a4, $a0, 0 +; LA32-NEXT: beqz $a4, .LBB49_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a2, $a3 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_and_i16_release: +; LA64: # %bb.0: +; LA64-NEXT: lu12i.w $a2, 15 +; LA64-NEXT: ori $a2, $a2, 4095 +; LA64-NEXT: slli.d $a3, $a0, 3 +; LA64-NEXT: sll.w $a2, $a2, $a3 +; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 +; LA64-NEXT: sll.w $a1, $a1, $a3 +; LA64-NEXT: orn $a1, $a1, $a2 +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a0, $a0, $a2 +; LA64-NEXT: amand_db.w $a2, $a1, $a0 +; LA64-NEXT: srl.w $a0, $a2, $a3 +; LA64-NEXT: ret + %1 = atomicrmw and ptr %a, i16 %b release + ret i16 %1 +} + +define i32 @atomicrmw_and_i32_release(ptr %a, i32 %b) nounwind { +; LA32-LABEL: atomicrmw_and_i32_release: +; LA32: # %bb.0: +; LA32-NEXT: .LBB50_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a2, $a0, 0 +; LA32-NEXT: and $a3, $a2, $a1 +; LA32-NEXT: sc.w $a3, $a0, 0 +; LA32-NEXT: beqz $a3, .LBB50_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_and_i32_release: +; LA64: # %bb.0: +; LA64-NEXT: amand_db.w $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw and ptr %a, i32 %b release + ret i32 %1 +} + +define i64 @atomicrmw_and_i64_release(ptr %a, i64 %b) nounwind { +; LA32-LABEL: atomicrmw_and_i64_release: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a3, $zero, 3 +; LA32-NEXT: bl %plt(__atomic_fetch_and_8) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_and_i64_release: +; LA64: # %bb.0: +; LA64-NEXT: amand_db.d $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw and ptr %a, i64 %b release + ret i64 %1 +} + +define i8 @atomicrmw_or_i8_release(ptr %a, i8 %b) nounwind { +; LA32-LABEL: atomicrmw_or_i8_release: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a2, $zero, -4 +; LA32-NEXT: and $a2, $a0, $a2 +; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: andi $a1, $a1, 255 +; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: .LBB52_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a3, $a2, 0 +; LA32-NEXT: or $a4, $a3, $a1 +; LA32-NEXT: sc.w $a4, $a2, 0 +; LA32-NEXT: beqz $a4, .LBB52_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a3, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_or_i8_release: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: andi $a1, $a1, 255 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: amor_db.w $a3, $a1, $a2 +; LA64-NEXT: srl.w $a0, $a3, $a0 +; LA64-NEXT: ret + %1 = atomicrmw or ptr %a, i8 %b release + ret i8 %1 +} + +define i16 @atomicrmw_or_i16_release(ptr %a, i16 %b) nounwind { +; LA32-LABEL: atomicrmw_or_i16_release: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a2, $zero, -4 +; LA32-NEXT: and $a2, $a0, $a2 +; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 +; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: .LBB53_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a3, $a2, 0 +; LA32-NEXT: or $a4, $a3, $a1 +; LA32-NEXT: sc.w $a4, $a2, 0 +; LA32-NEXT: beqz $a4, .LBB53_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a3, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_or_i16_release: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: amor_db.w $a3, $a1, $a2 +; LA64-NEXT: srl.w $a0, $a3, $a0 +; LA64-NEXT: ret + %1 = atomicrmw or ptr %a, i16 %b release + ret i16 %1 +} + +define i32 @atomicrmw_or_i32_release(ptr %a, i32 %b) nounwind { +; LA32-LABEL: atomicrmw_or_i32_release: +; LA32: # %bb.0: +; LA32-NEXT: .LBB54_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a2, $a0, 0 +; LA32-NEXT: or $a3, $a2, $a1 +; LA32-NEXT: sc.w $a3, $a0, 0 +; LA32-NEXT: beqz $a3, .LBB54_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_or_i32_release: +; LA64: # %bb.0: +; LA64-NEXT: amor_db.w $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw or ptr %a, i32 %b release + ret i32 %1 +} + +define i64 @atomicrmw_or_i64_release(ptr %a, i64 %b) nounwind { +; LA32-LABEL: atomicrmw_or_i64_release: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a3, $zero, 3 +; LA32-NEXT: bl %plt(__atomic_fetch_or_8) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_or_i64_release: +; LA64: # %bb.0: +; LA64-NEXT: amor_db.d $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw or ptr %a, i64 %b release + ret i64 %1 +} + +define i8 @atomicrmw_xor_i8_release(ptr %a, i8 %b) nounwind { +; LA32-LABEL: atomicrmw_xor_i8_release: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a2, $zero, -4 +; LA32-NEXT: and $a2, $a0, $a2 +; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: andi $a1, $a1, 255 +; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: .LBB56_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a3, $a2, 0 +; LA32-NEXT: xor $a4, $a3, $a1 +; LA32-NEXT: sc.w $a4, $a2, 0 +; LA32-NEXT: beqz $a4, .LBB56_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a3, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_xor_i8_release: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: andi $a1, $a1, 255 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: amxor_db.w $a3, $a1, $a2 +; LA64-NEXT: srl.w $a0, $a3, $a0 +; LA64-NEXT: ret + %1 = atomicrmw xor ptr %a, i8 %b release + ret i8 %1 +} + +define i16 @atomicrmw_xor_i16_release(ptr %a, i16 %b) nounwind { +; LA32-LABEL: atomicrmw_xor_i16_release: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a2, $zero, -4 +; LA32-NEXT: and $a2, $a0, $a2 +; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 +; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: .LBB57_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a3, $a2, 0 +; LA32-NEXT: xor $a4, $a3, $a1 +; LA32-NEXT: sc.w $a4, $a2, 0 +; LA32-NEXT: beqz $a4, .LBB57_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a3, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_xor_i16_release: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: amxor_db.w $a3, $a1, $a2 +; LA64-NEXT: srl.w $a0, $a3, $a0 +; LA64-NEXT: ret + %1 = atomicrmw xor ptr %a, i16 %b release + ret i16 %1 +} + +define i32 @atomicrmw_xor_i32_release(ptr %a, i32 %b) nounwind { +; LA32-LABEL: atomicrmw_xor_i32_release: +; LA32: # %bb.0: +; LA32-NEXT: .LBB58_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a2, $a0, 0 +; LA32-NEXT: xor $a3, $a2, $a1 +; LA32-NEXT: sc.w $a3, $a0, 0 +; LA32-NEXT: beqz $a3, .LBB58_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_xor_i32_release: +; LA64: # %bb.0: +; LA64-NEXT: amxor_db.w $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw xor ptr %a, i32 %b release + ret i32 %1 +} + +define i64 @atomicrmw_xor_i64_release(ptr %a, i64 %b) nounwind { +; LA32-LABEL: atomicrmw_xor_i64_release: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a3, $zero, 3 +; LA32-NEXT: bl %plt(__atomic_fetch_xor_8) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_xor_i64_release: +; LA64: # %bb.0: +; LA64-NEXT: amxor_db.d $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw xor ptr %a, i64 %b release + ret i64 %1 +} + +define i8 @atomicrmw_xchg_i8_acq_rel(ptr %a, i8 %b) nounwind { +; LA32-LABEL: atomicrmw_xchg_i8_acq_rel: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a2, $zero, -4 +; LA32-NEXT: and $a2, $a0, $a2 +; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: ori $a3, $zero, 255 +; LA32-NEXT: sll.w $a3, $a3, $a0 +; LA32-NEXT: andi $a1, $a1, 255 +; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: .LBB60_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a4, $a2, 0 +; LA32-NEXT: addi.w $a5, $a1, 0 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a2, 0 +; LA32-NEXT: beqz $a5, .LBB60_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_xchg_i8_acq_rel: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: ori $a3, $zero, 255 +; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: andi $a1, $a1, 255 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB60_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a4, $a2, 0 +; LA64-NEXT: addi.w $a5, $a1, 0 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: beqz $a5, .LBB60_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: ret + %1 = atomicrmw xchg ptr %a, i8 %b acq_rel + ret i8 %1 +} + +define i8 @atomicrmw_xchg_0_i8_acq_rel(ptr %a) nounwind { +; LA32-LABEL: atomicrmw_xchg_0_i8_acq_rel: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a1, $zero, -4 +; LA32-NEXT: and $a1, $a0, $a1 +; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: ori $a2, $zero, 255 +; LA32-NEXT: sll.w $a2, $a2, $a0 +; LA32-NEXT: .LBB61_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a3, $a1, 0 +; LA32-NEXT: addi.w $a4, $zero, 0 +; LA32-NEXT: xor $a4, $a3, $a4 +; LA32-NEXT: and $a4, $a4, $a2 +; LA32-NEXT: xor $a4, $a3, $a4 +; LA32-NEXT: sc.w $a4, $a1, 0 +; LA32-NEXT: beqz $a4, .LBB61_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a3, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_xchg_0_i8_acq_rel: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a1, $zero, -4 +; LA64-NEXT: and $a1, $a0, $a1 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: ori $a2, $zero, 255 +; LA64-NEXT: sll.w $a2, $a2, $a0 +; LA64-NEXT: addi.w $a2, $a2, 0 +; LA64-NEXT: .LBB61_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a3, $a1, 0 +; LA64-NEXT: addi.w $a4, $zero, 0 +; LA64-NEXT: xor $a4, $a3, $a4 +; LA64-NEXT: and $a4, $a4, $a2 +; LA64-NEXT: xor $a4, $a3, $a4 +; LA64-NEXT: sc.w $a4, $a1, 0 +; LA64-NEXT: beqz $a4, .LBB61_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: srl.w $a0, $a3, $a0 +; LA64-NEXT: ret + %1 = atomicrmw xchg ptr %a, i8 0 acq_rel + ret i8 %1 +} + +define i8 @atomicrmw_xchg_minus_1_i8_acq_rel(ptr %a) nounwind { +; LA32-LABEL: atomicrmw_xchg_minus_1_i8_acq_rel: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a1, $zero, -4 +; LA32-NEXT: and $a1, $a0, $a1 +; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: ori $a2, $zero, 255 +; LA32-NEXT: sll.w $a2, $a2, $a0 +; LA32-NEXT: .LBB62_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a3, $a1, 0 +; LA32-NEXT: addi.w $a4, $a2, 0 +; LA32-NEXT: xor $a4, $a3, $a4 +; LA32-NEXT: and $a4, $a4, $a2 +; LA32-NEXT: xor $a4, $a3, $a4 +; LA32-NEXT: sc.w $a4, $a1, 0 +; LA32-NEXT: beqz $a4, .LBB62_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a3, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_xchg_minus_1_i8_acq_rel: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a1, $zero, -4 +; LA64-NEXT: and $a1, $a0, $a1 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: ori $a2, $zero, 255 +; LA64-NEXT: sll.w $a2, $a2, $a0 +; LA64-NEXT: addi.w $a2, $a2, 0 +; LA64-NEXT: .LBB62_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a3, $a1, 0 +; LA64-NEXT: addi.w $a4, $a2, 0 +; LA64-NEXT: xor $a4, $a3, $a4 +; LA64-NEXT: and $a4, $a4, $a2 +; LA64-NEXT: xor $a4, $a3, $a4 +; LA64-NEXT: sc.w $a4, $a1, 0 +; LA64-NEXT: beqz $a4, .LBB62_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: srl.w $a0, $a3, $a0 +; LA64-NEXT: ret + %1 = atomicrmw xchg ptr %a, i8 -1 acq_rel + ret i8 %1 +} + +define i16 @atomicrmw_xchg_i16_acq_rel(ptr %a, i16 %b) nounwind { +; LA32-LABEL: atomicrmw_xchg_i16_acq_rel: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a2, $zero, -4 +; LA32-NEXT: and $a2, $a0, $a2 +; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: lu12i.w $a3, 15 +; LA32-NEXT: ori $a3, $a3, 4095 +; LA32-NEXT: sll.w $a3, $a3, $a0 +; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 +; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: .LBB63_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a4, $a2, 0 +; LA32-NEXT: addi.w $a5, $a1, 0 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a2, 0 +; LA32-NEXT: beqz $a5, .LBB63_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_xchg_i16_acq_rel: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: lu12i.w $a3, 15 +; LA64-NEXT: ori $a3, $a3, 4095 +; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB63_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a4, $a2, 0 +; LA64-NEXT: addi.w $a5, $a1, 0 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: beqz $a5, .LBB63_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: ret + %1 = atomicrmw xchg ptr %a, i16 %b acq_rel + ret i16 %1 +} + +define i16 @atomicrmw_xchg_0_i16_acq_rel(ptr %a) nounwind { +; LA32-LABEL: atomicrmw_xchg_0_i16_acq_rel: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a1, $zero, -4 +; LA32-NEXT: and $a1, $a0, $a1 +; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: lu12i.w $a2, 15 +; LA32-NEXT: ori $a2, $a2, 4095 +; LA32-NEXT: sll.w $a2, $a2, $a0 +; LA32-NEXT: .LBB64_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a3, $a1, 0 +; LA32-NEXT: addi.w $a4, $zero, 0 +; LA32-NEXT: xor $a4, $a3, $a4 +; LA32-NEXT: and $a4, $a4, $a2 +; LA32-NEXT: xor $a4, $a3, $a4 +; LA32-NEXT: sc.w $a4, $a1, 0 +; LA32-NEXT: beqz $a4, .LBB64_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a3, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_xchg_0_i16_acq_rel: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a1, $zero, -4 +; LA64-NEXT: and $a1, $a0, $a1 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: lu12i.w $a2, 15 +; LA64-NEXT: ori $a2, $a2, 4095 +; LA64-NEXT: sll.w $a2, $a2, $a0 +; LA64-NEXT: addi.w $a2, $a2, 0 +; LA64-NEXT: .LBB64_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a3, $a1, 0 +; LA64-NEXT: addi.w $a4, $zero, 0 +; LA64-NEXT: xor $a4, $a3, $a4 +; LA64-NEXT: and $a4, $a4, $a2 +; LA64-NEXT: xor $a4, $a3, $a4 +; LA64-NEXT: sc.w $a4, $a1, 0 +; LA64-NEXT: beqz $a4, .LBB64_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: srl.w $a0, $a3, $a0 +; LA64-NEXT: ret + %1 = atomicrmw xchg ptr %a, i16 0 acq_rel + ret i16 %1 +} + +define i16 @atomicrmw_xchg_minus_1_i16_acq_rel(ptr %a) nounwind { +; LA32-LABEL: atomicrmw_xchg_minus_1_i16_acq_rel: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a1, $zero, -4 +; LA32-NEXT: and $a1, $a0, $a1 +; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: lu12i.w $a2, 15 +; LA32-NEXT: ori $a2, $a2, 4095 +; LA32-NEXT: sll.w $a2, $a2, $a0 +; LA32-NEXT: .LBB65_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a3, $a1, 0 +; LA32-NEXT: addi.w $a4, $a2, 0 +; LA32-NEXT: xor $a4, $a3, $a4 +; LA32-NEXT: and $a4, $a4, $a2 +; LA32-NEXT: xor $a4, $a3, $a4 +; LA32-NEXT: sc.w $a4, $a1, 0 +; LA32-NEXT: beqz $a4, .LBB65_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a3, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_xchg_minus_1_i16_acq_rel: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a1, $zero, -4 +; LA64-NEXT: and $a1, $a0, $a1 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: lu12i.w $a2, 15 +; LA64-NEXT: ori $a2, $a2, 4095 +; LA64-NEXT: sll.w $a2, $a2, $a0 +; LA64-NEXT: addi.w $a2, $a2, 0 +; LA64-NEXT: .LBB65_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a3, $a1, 0 +; LA64-NEXT: addi.w $a4, $a2, 0 +; LA64-NEXT: xor $a4, $a3, $a4 +; LA64-NEXT: and $a4, $a4, $a2 +; LA64-NEXT: xor $a4, $a3, $a4 +; LA64-NEXT: sc.w $a4, $a1, 0 +; LA64-NEXT: beqz $a4, .LBB65_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: srl.w $a0, $a3, $a0 +; LA64-NEXT: ret + %1 = atomicrmw xchg ptr %a, i16 -1 acq_rel + ret i16 %1 +} + +define i32 @atomicrmw_xchg_i32_acq_rel(ptr %a, i32 %b) nounwind { +; LA32-LABEL: atomicrmw_xchg_i32_acq_rel: +; LA32: # %bb.0: +; LA32-NEXT: .LBB66_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a2, $a0, 0 +; LA32-NEXT: move $a3, $a1 +; LA32-NEXT: sc.w $a3, $a0, 0 +; LA32-NEXT: beqz $a3, .LBB66_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_xchg_i32_acq_rel: +; LA64: # %bb.0: +; LA64-NEXT: amswap_db.w $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw xchg ptr %a, i32 %b acq_rel + ret i32 %1 +} + +define i64 @atomicrmw_xchg_i64_acq_rel(ptr %a, i64 %b) nounwind { +; LA32-LABEL: atomicrmw_xchg_i64_acq_rel: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a3, $zero, 4 +; LA32-NEXT: bl %plt(__atomic_exchange_8) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_xchg_i64_acq_rel: +; LA64: # %bb.0: +; LA64-NEXT: amswap_db.d $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw xchg ptr %a, i64 %b acq_rel + ret i64 %1 +} + +define i8 @atomicrmw_add_i8_acq_rel(ptr %a, i8 %b) nounwind { +; LA32-LABEL: atomicrmw_add_i8_acq_rel: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a2, $zero, -4 +; LA32-NEXT: and $a2, $a0, $a2 +; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: ori $a3, $zero, 255 +; LA32-NEXT: sll.w $a3, $a3, $a0 +; LA32-NEXT: andi $a1, $a1, 255 +; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: .LBB68_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a4, $a2, 0 +; LA32-NEXT: add.w $a5, $a4, $a1 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a2, 0 +; LA32-NEXT: beqz $a5, .LBB68_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_add_i8_acq_rel: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: ori $a3, $zero, 255 +; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: andi $a1, $a1, 255 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB68_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a4, $a2, 0 +; LA64-NEXT: add.w $a5, $a4, $a1 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: beqz $a5, .LBB68_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: ret + %1 = atomicrmw add ptr %a, i8 %b acq_rel + ret i8 %1 +} + +define i16 @atomicrmw_add_i16_acq_rel(ptr %a, i16 %b) nounwind { +; LA32-LABEL: atomicrmw_add_i16_acq_rel: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a2, $zero, -4 +; LA32-NEXT: and $a2, $a0, $a2 +; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: lu12i.w $a3, 15 +; LA32-NEXT: ori $a3, $a3, 4095 +; LA32-NEXT: sll.w $a3, $a3, $a0 +; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 +; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: .LBB69_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a4, $a2, 0 +; LA32-NEXT: add.w $a5, $a4, $a1 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a2, 0 +; LA32-NEXT: beqz $a5, .LBB69_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_add_i16_acq_rel: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: lu12i.w $a3, 15 +; LA64-NEXT: ori $a3, $a3, 4095 +; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB69_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a4, $a2, 0 +; LA64-NEXT: add.w $a5, $a4, $a1 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: beqz $a5, .LBB69_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: ret + %1 = atomicrmw add ptr %a, i16 %b acq_rel + ret i16 %1 +} + +define i32 @atomicrmw_add_i32_acq_rel(ptr %a, i32 %b) nounwind { +; LA32-LABEL: atomicrmw_add_i32_acq_rel: +; LA32: # %bb.0: +; LA32-NEXT: .LBB70_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a2, $a0, 0 +; LA32-NEXT: add.w $a3, $a2, $a1 +; LA32-NEXT: sc.w $a3, $a0, 0 +; LA32-NEXT: beqz $a3, .LBB70_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_add_i32_acq_rel: +; LA64: # %bb.0: +; LA64-NEXT: amadd_db.w $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw add ptr %a, i32 %b acq_rel + ret i32 %1 +} + +define i64 @atomicrmw_add_i64_acq_rel(ptr %a, i64 %b) nounwind { +; LA32-LABEL: atomicrmw_add_i64_acq_rel: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a3, $zero, 4 +; LA32-NEXT: bl %plt(__atomic_fetch_add_8) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_add_i64_acq_rel: +; LA64: # %bb.0: +; LA64-NEXT: amadd_db.d $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw add ptr %a, i64 %b acq_rel + ret i64 %1 +} + +define i8 @atomicrmw_sub_i8_acq_rel(ptr %a, i8 %b) nounwind { +; LA32-LABEL: atomicrmw_sub_i8_acq_rel: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a2, $zero, -4 +; LA32-NEXT: and $a2, $a0, $a2 +; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: ori $a3, $zero, 255 +; LA32-NEXT: sll.w $a3, $a3, $a0 +; LA32-NEXT: andi $a1, $a1, 255 +; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: .LBB72_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a4, $a2, 0 +; LA32-NEXT: sub.w $a5, $a4, $a1 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a2, 0 +; LA32-NEXT: beqz $a5, .LBB72_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_sub_i8_acq_rel: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: ori $a3, $zero, 255 +; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: andi $a1, $a1, 255 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB72_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a4, $a2, 0 +; LA64-NEXT: sub.w $a5, $a4, $a1 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: beqz $a5, .LBB72_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: ret + %1 = atomicrmw sub ptr %a, i8 %b acq_rel + ret i8 %1 +} + +define i16 @atomicrmw_sub_i16_acq_rel(ptr %a, i16 %b) nounwind { +; LA32-LABEL: atomicrmw_sub_i16_acq_rel: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a2, $zero, -4 +; LA32-NEXT: and $a2, $a0, $a2 +; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: lu12i.w $a3, 15 +; LA32-NEXT: ori $a3, $a3, 4095 +; LA32-NEXT: sll.w $a3, $a3, $a0 +; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 +; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: .LBB73_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a4, $a2, 0 +; LA32-NEXT: sub.w $a5, $a4, $a1 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a2, 0 +; LA32-NEXT: beqz $a5, .LBB73_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_sub_i16_acq_rel: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: lu12i.w $a3, 15 +; LA64-NEXT: ori $a3, $a3, 4095 +; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB73_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a4, $a2, 0 +; LA64-NEXT: sub.w $a5, $a4, $a1 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: beqz $a5, .LBB73_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: ret + %1 = atomicrmw sub ptr %a, i16 %b acq_rel + ret i16 %1 +} + +define i32 @atomicrmw_sub_i32_acq_rel(ptr %a, i32 %b) nounwind { +; LA32-LABEL: atomicrmw_sub_i32_acq_rel: +; LA32: # %bb.0: +; LA32-NEXT: .LBB74_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a2, $a0, 0 +; LA32-NEXT: sub.w $a3, $a2, $a1 +; LA32-NEXT: sc.w $a3, $a0, 0 +; LA32-NEXT: beqz $a3, .LBB74_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_sub_i32_acq_rel: +; LA64: # %bb.0: +; LA64-NEXT: sub.w $a2, $zero, $a1 +; LA64-NEXT: amadd_db.w $a1, $a2, $a0 +; LA64-NEXT: move $a0, $a1 +; LA64-NEXT: ret + %1 = atomicrmw sub ptr %a, i32 %b acq_rel + ret i32 %1 +} + +define i64 @atomicrmw_sub_i64_acq_rel(ptr %a, i64 %b) nounwind { +; LA32-LABEL: atomicrmw_sub_i64_acq_rel: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a3, $zero, 4 +; LA32-NEXT: bl %plt(__atomic_fetch_sub_8) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_sub_i64_acq_rel: +; LA64: # %bb.0: +; LA64-NEXT: sub.d $a2, $zero, $a1 +; LA64-NEXT: amadd_db.d $a1, $a2, $a0 +; LA64-NEXT: move $a0, $a1 +; LA64-NEXT: ret + %1 = atomicrmw sub ptr %a, i64 %b acq_rel + ret i64 %1 +} + +define i8 @atomicrmw_nand_i8_acq_rel(ptr %a, i8 %b) nounwind { +; LA32-LABEL: atomicrmw_nand_i8_acq_rel: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a2, $zero, -4 +; LA32-NEXT: and $a2, $a0, $a2 +; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: ori $a3, $zero, 255 +; LA32-NEXT: sll.w $a3, $a3, $a0 +; LA32-NEXT: andi $a1, $a1, 255 +; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: .LBB76_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a4, $a2, 0 +; LA32-NEXT: and $a5, $a4, $a1 +; LA32-NEXT: nor $a5, $a5, $zero +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a2, 0 +; LA32-NEXT: beqz $a5, .LBB76_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_nand_i8_acq_rel: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: ori $a3, $zero, 255 +; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: andi $a1, $a1, 255 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB76_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a4, $a2, 0 +; LA64-NEXT: and $a5, $a4, $a1 +; LA64-NEXT: nor $a5, $a5, $zero +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: beqz $a5, .LBB76_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: ret + %1 = atomicrmw nand ptr %a, i8 %b acq_rel + ret i8 %1 +} + +define i16 @atomicrmw_nand_i16_acq_rel(ptr %a, i16 %b) nounwind { +; LA32-LABEL: atomicrmw_nand_i16_acq_rel: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a2, $zero, -4 +; LA32-NEXT: and $a2, $a0, $a2 +; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: lu12i.w $a3, 15 +; LA32-NEXT: ori $a3, $a3, 4095 +; LA32-NEXT: sll.w $a3, $a3, $a0 +; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 +; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: .LBB77_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a4, $a2, 0 +; LA32-NEXT: and $a5, $a4, $a1 +; LA32-NEXT: nor $a5, $a5, $zero +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a2, 0 +; LA32-NEXT: beqz $a5, .LBB77_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_nand_i16_acq_rel: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: lu12i.w $a3, 15 +; LA64-NEXT: ori $a3, $a3, 4095 +; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB77_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a4, $a2, 0 +; LA64-NEXT: and $a5, $a4, $a1 +; LA64-NEXT: nor $a5, $a5, $zero +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: beqz $a5, .LBB77_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: ret + %1 = atomicrmw nand ptr %a, i16 %b acq_rel + ret i16 %1 +} + +define i32 @atomicrmw_nand_i32_acq_rel(ptr %a, i32 %b) nounwind { +; LA32-LABEL: atomicrmw_nand_i32_acq_rel: +; LA32: # %bb.0: +; LA32-NEXT: .LBB78_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a2, $a0, 0 +; LA32-NEXT: and $a3, $a2, $a1 +; LA32-NEXT: nor $a3, $a3, $zero +; LA32-NEXT: sc.w $a3, $a0, 0 +; LA32-NEXT: beqz $a3, .LBB78_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_nand_i32_acq_rel: +; LA64: # %bb.0: +; LA64-NEXT: .LBB78_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a2, $a0, 0 +; LA64-NEXT: and $a3, $a2, $a1 +; LA64-NEXT: nor $a3, $a3, $zero +; LA64-NEXT: sc.w $a3, $a0, 0 +; LA64-NEXT: beqz $a3, .LBB78_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw nand ptr %a, i32 %b acq_rel + ret i32 %1 +} + +define i64 @atomicrmw_nand_i64_acq_rel(ptr %a, i64 %b) nounwind { +; LA32-LABEL: atomicrmw_nand_i64_acq_rel: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a3, $zero, 4 +; LA32-NEXT: bl %plt(__atomic_fetch_nand_8) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_nand_i64_acq_rel: +; LA64: # %bb.0: +; LA64-NEXT: .LBB79_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.d $a2, $a0, 0 +; LA64-NEXT: and $a3, $a2, $a1 +; LA64-NEXT: nor $a3, $a3, $zero +; LA64-NEXT: sc.d $a3, $a0, 0 +; LA64-NEXT: beqz $a3, .LBB79_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw nand ptr %a, i64 %b acq_rel + ret i64 %1 +} + +define i8 @atomicrmw_and_i8_acq_rel(ptr %a, i8 %b) nounwind { +; LA32-LABEL: atomicrmw_and_i8_acq_rel: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a2, $a0, 3 +; LA32-NEXT: ori $a3, $zero, 255 +; LA32-NEXT: sll.w $a3, $a3, $a2 +; LA32-NEXT: andi $a1, $a1, 255 +; LA32-NEXT: sll.w $a1, $a1, $a2 +; LA32-NEXT: orn $a1, $a1, $a3 +; LA32-NEXT: addi.w $a3, $zero, -4 +; LA32-NEXT: and $a0, $a0, $a3 +; LA32-NEXT: .LBB80_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a3, $a0, 0 +; LA32-NEXT: and $a4, $a3, $a1 +; LA32-NEXT: sc.w $a4, $a0, 0 +; LA32-NEXT: beqz $a4, .LBB80_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a3, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_and_i8_acq_rel: +; LA64: # %bb.0: +; LA64-NEXT: slli.d $a2, $a0, 3 +; LA64-NEXT: ori $a3, $zero, 255 +; LA64-NEXT: sll.w $a3, $a3, $a2 +; LA64-NEXT: andi $a1, $a1, 255 +; LA64-NEXT: sll.w $a1, $a1, $a2 +; LA64-NEXT: orn $a1, $a1, $a3 +; LA64-NEXT: addi.w $a3, $zero, -4 +; LA64-NEXT: and $a0, $a0, $a3 +; LA64-NEXT: amand_db.w $a3, $a1, $a0 +; LA64-NEXT: srl.w $a0, $a3, $a2 +; LA64-NEXT: ret + %1 = atomicrmw and ptr %a, i8 %b acq_rel + ret i8 %1 +} + +define i16 @atomicrmw_and_i16_acq_rel(ptr %a, i16 %b) nounwind { +; LA32-LABEL: atomicrmw_and_i16_acq_rel: +; LA32: # %bb.0: +; LA32-NEXT: lu12i.w $a2, 15 +; LA32-NEXT: ori $a2, $a2, 4095 +; LA32-NEXT: slli.w $a3, $a0, 3 +; LA32-NEXT: sll.w $a2, $a2, $a3 +; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 +; LA32-NEXT: sll.w $a1, $a1, $a3 +; LA32-NEXT: orn $a1, $a1, $a2 +; LA32-NEXT: addi.w $a2, $zero, -4 +; LA32-NEXT: and $a0, $a0, $a2 +; LA32-NEXT: .LBB81_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a2, $a0, 0 +; LA32-NEXT: and $a4, $a2, $a1 +; LA32-NEXT: sc.w $a4, $a0, 0 +; LA32-NEXT: beqz $a4, .LBB81_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a2, $a3 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_and_i16_acq_rel: +; LA64: # %bb.0: +; LA64-NEXT: lu12i.w $a2, 15 +; LA64-NEXT: ori $a2, $a2, 4095 +; LA64-NEXT: slli.d $a3, $a0, 3 +; LA64-NEXT: sll.w $a2, $a2, $a3 +; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 +; LA64-NEXT: sll.w $a1, $a1, $a3 +; LA64-NEXT: orn $a1, $a1, $a2 +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a0, $a0, $a2 +; LA64-NEXT: amand_db.w $a2, $a1, $a0 +; LA64-NEXT: srl.w $a0, $a2, $a3 +; LA64-NEXT: ret + %1 = atomicrmw and ptr %a, i16 %b acq_rel + ret i16 %1 +} + +define i32 @atomicrmw_and_i32_acq_rel(ptr %a, i32 %b) nounwind { +; LA32-LABEL: atomicrmw_and_i32_acq_rel: +; LA32: # %bb.0: +; LA32-NEXT: .LBB82_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a2, $a0, 0 +; LA32-NEXT: and $a3, $a2, $a1 +; LA32-NEXT: sc.w $a3, $a0, 0 +; LA32-NEXT: beqz $a3, .LBB82_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_and_i32_acq_rel: +; LA64: # %bb.0: +; LA64-NEXT: amand_db.w $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw and ptr %a, i32 %b acq_rel + ret i32 %1 +} + +define i64 @atomicrmw_and_i64_acq_rel(ptr %a, i64 %b) nounwind { +; LA32-LABEL: atomicrmw_and_i64_acq_rel: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a3, $zero, 4 +; LA32-NEXT: bl %plt(__atomic_fetch_and_8) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_and_i64_acq_rel: +; LA64: # %bb.0: +; LA64-NEXT: amand_db.d $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw and ptr %a, i64 %b acq_rel + ret i64 %1 +} + +define i8 @atomicrmw_or_i8_acq_rel(ptr %a, i8 %b) nounwind { +; LA32-LABEL: atomicrmw_or_i8_acq_rel: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a2, $zero, -4 +; LA32-NEXT: and $a2, $a0, $a2 +; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: andi $a1, $a1, 255 +; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: .LBB84_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a3, $a2, 0 +; LA32-NEXT: or $a4, $a3, $a1 +; LA32-NEXT: sc.w $a4, $a2, 0 +; LA32-NEXT: beqz $a4, .LBB84_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a3, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_or_i8_acq_rel: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: andi $a1, $a1, 255 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: amor_db.w $a3, $a1, $a2 +; LA64-NEXT: srl.w $a0, $a3, $a0 +; LA64-NEXT: ret + %1 = atomicrmw or ptr %a, i8 %b acq_rel + ret i8 %1 +} + +define i16 @atomicrmw_or_i16_acq_rel(ptr %a, i16 %b) nounwind { +; LA32-LABEL: atomicrmw_or_i16_acq_rel: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a2, $zero, -4 +; LA32-NEXT: and $a2, $a0, $a2 +; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 +; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: .LBB85_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a3, $a2, 0 +; LA32-NEXT: or $a4, $a3, $a1 +; LA32-NEXT: sc.w $a4, $a2, 0 +; LA32-NEXT: beqz $a4, .LBB85_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a3, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_or_i16_acq_rel: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: amor_db.w $a3, $a1, $a2 +; LA64-NEXT: srl.w $a0, $a3, $a0 +; LA64-NEXT: ret + %1 = atomicrmw or ptr %a, i16 %b acq_rel + ret i16 %1 +} + +define i32 @atomicrmw_or_i32_acq_rel(ptr %a, i32 %b) nounwind { +; LA32-LABEL: atomicrmw_or_i32_acq_rel: +; LA32: # %bb.0: +; LA32-NEXT: .LBB86_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a2, $a0, 0 +; LA32-NEXT: or $a3, $a2, $a1 +; LA32-NEXT: sc.w $a3, $a0, 0 +; LA32-NEXT: beqz $a3, .LBB86_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_or_i32_acq_rel: +; LA64: # %bb.0: +; LA64-NEXT: amor_db.w $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw or ptr %a, i32 %b acq_rel + ret i32 %1 +} + +define i64 @atomicrmw_or_i64_acq_rel(ptr %a, i64 %b) nounwind { +; LA32-LABEL: atomicrmw_or_i64_acq_rel: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a3, $zero, 4 +; LA32-NEXT: bl %plt(__atomic_fetch_or_8) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_or_i64_acq_rel: +; LA64: # %bb.0: +; LA64-NEXT: amor_db.d $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw or ptr %a, i64 %b acq_rel + ret i64 %1 +} + +define i8 @atomicrmw_xor_i8_acq_rel(ptr %a, i8 %b) nounwind { +; LA32-LABEL: atomicrmw_xor_i8_acq_rel: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a2, $zero, -4 +; LA32-NEXT: and $a2, $a0, $a2 +; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: andi $a1, $a1, 255 +; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: .LBB88_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a3, $a2, 0 +; LA32-NEXT: xor $a4, $a3, $a1 +; LA32-NEXT: sc.w $a4, $a2, 0 +; LA32-NEXT: beqz $a4, .LBB88_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a3, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_xor_i8_acq_rel: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: andi $a1, $a1, 255 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: amxor_db.w $a3, $a1, $a2 +; LA64-NEXT: srl.w $a0, $a3, $a0 +; LA64-NEXT: ret + %1 = atomicrmw xor ptr %a, i8 %b acq_rel + ret i8 %1 +} + +define i16 @atomicrmw_xor_i16_acq_rel(ptr %a, i16 %b) nounwind { +; LA32-LABEL: atomicrmw_xor_i16_acq_rel: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a2, $zero, -4 +; LA32-NEXT: and $a2, $a0, $a2 +; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 +; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: .LBB89_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a3, $a2, 0 +; LA32-NEXT: xor $a4, $a3, $a1 +; LA32-NEXT: sc.w $a4, $a2, 0 +; LA32-NEXT: beqz $a4, .LBB89_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a3, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_xor_i16_acq_rel: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: amxor_db.w $a3, $a1, $a2 +; LA64-NEXT: srl.w $a0, $a3, $a0 +; LA64-NEXT: ret + %1 = atomicrmw xor ptr %a, i16 %b acq_rel + ret i16 %1 +} + +define i32 @atomicrmw_xor_i32_acq_rel(ptr %a, i32 %b) nounwind { +; LA32-LABEL: atomicrmw_xor_i32_acq_rel: +; LA32: # %bb.0: +; LA32-NEXT: .LBB90_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a2, $a0, 0 +; LA32-NEXT: xor $a3, $a2, $a1 +; LA32-NEXT: sc.w $a3, $a0, 0 +; LA32-NEXT: beqz $a3, .LBB90_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_xor_i32_acq_rel: +; LA64: # %bb.0: +; LA64-NEXT: amxor_db.w $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw xor ptr %a, i32 %b acq_rel + ret i32 %1 +} + +define i64 @atomicrmw_xor_i64_acq_rel(ptr %a, i64 %b) nounwind { +; LA32-LABEL: atomicrmw_xor_i64_acq_rel: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a3, $zero, 4 +; LA32-NEXT: bl %plt(__atomic_fetch_xor_8) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_xor_i64_acq_rel: +; LA64: # %bb.0: +; LA64-NEXT: amxor_db.d $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw xor ptr %a, i64 %b acq_rel + ret i64 %1 +} + +define i8 @atomicrmw_xchg_i8_seq_cst(ptr %a, i8 %b) nounwind { +; LA32-LABEL: atomicrmw_xchg_i8_seq_cst: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a2, $zero, -4 +; LA32-NEXT: and $a2, $a0, $a2 +; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: ori $a3, $zero, 255 +; LA32-NEXT: sll.w $a3, $a3, $a0 +; LA32-NEXT: andi $a1, $a1, 255 +; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: .LBB92_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a4, $a2, 0 +; LA32-NEXT: addi.w $a5, $a1, 0 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a2, 0 +; LA32-NEXT: beqz $a5, .LBB92_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_xchg_i8_seq_cst: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: ori $a3, $zero, 255 +; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: andi $a1, $a1, 255 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB92_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a4, $a2, 0 +; LA64-NEXT: addi.w $a5, $a1, 0 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: beqz $a5, .LBB92_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: ret + %1 = atomicrmw xchg ptr %a, i8 %b seq_cst + ret i8 %1 +} + +define i8 @atomicrmw_xchg_0_i8_seq_cst(ptr %a) nounwind { +; LA32-LABEL: atomicrmw_xchg_0_i8_seq_cst: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a1, $zero, -4 +; LA32-NEXT: and $a1, $a0, $a1 +; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: ori $a2, $zero, 255 +; LA32-NEXT: sll.w $a2, $a2, $a0 +; LA32-NEXT: .LBB93_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a3, $a1, 0 +; LA32-NEXT: addi.w $a4, $zero, 0 +; LA32-NEXT: xor $a4, $a3, $a4 +; LA32-NEXT: and $a4, $a4, $a2 +; LA32-NEXT: xor $a4, $a3, $a4 +; LA32-NEXT: sc.w $a4, $a1, 0 +; LA32-NEXT: beqz $a4, .LBB93_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a3, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_xchg_0_i8_seq_cst: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a1, $zero, -4 +; LA64-NEXT: and $a1, $a0, $a1 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: ori $a2, $zero, 255 +; LA64-NEXT: sll.w $a2, $a2, $a0 +; LA64-NEXT: addi.w $a2, $a2, 0 +; LA64-NEXT: .LBB93_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a3, $a1, 0 +; LA64-NEXT: addi.w $a4, $zero, 0 +; LA64-NEXT: xor $a4, $a3, $a4 +; LA64-NEXT: and $a4, $a4, $a2 +; LA64-NEXT: xor $a4, $a3, $a4 +; LA64-NEXT: sc.w $a4, $a1, 0 +; LA64-NEXT: beqz $a4, .LBB93_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: srl.w $a0, $a3, $a0 +; LA64-NEXT: ret + %1 = atomicrmw xchg ptr %a, i8 0 seq_cst + ret i8 %1 +} + +define i8 @atomicrmw_xchg_minus_1_i8_seq_cst(ptr %a) nounwind { +; LA32-LABEL: atomicrmw_xchg_minus_1_i8_seq_cst: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a1, $zero, -4 +; LA32-NEXT: and $a1, $a0, $a1 +; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: ori $a2, $zero, 255 +; LA32-NEXT: sll.w $a2, $a2, $a0 +; LA32-NEXT: .LBB94_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a3, $a1, 0 +; LA32-NEXT: addi.w $a4, $a2, 0 +; LA32-NEXT: xor $a4, $a3, $a4 +; LA32-NEXT: and $a4, $a4, $a2 +; LA32-NEXT: xor $a4, $a3, $a4 +; LA32-NEXT: sc.w $a4, $a1, 0 +; LA32-NEXT: beqz $a4, .LBB94_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a3, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_xchg_minus_1_i8_seq_cst: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a1, $zero, -4 +; LA64-NEXT: and $a1, $a0, $a1 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: ori $a2, $zero, 255 +; LA64-NEXT: sll.w $a2, $a2, $a0 +; LA64-NEXT: addi.w $a2, $a2, 0 +; LA64-NEXT: .LBB94_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a3, $a1, 0 +; LA64-NEXT: addi.w $a4, $a2, 0 +; LA64-NEXT: xor $a4, $a3, $a4 +; LA64-NEXT: and $a4, $a4, $a2 +; LA64-NEXT: xor $a4, $a3, $a4 +; LA64-NEXT: sc.w $a4, $a1, 0 +; LA64-NEXT: beqz $a4, .LBB94_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: srl.w $a0, $a3, $a0 +; LA64-NEXT: ret + %1 = atomicrmw xchg ptr %a, i8 -1 seq_cst + ret i8 %1 +} + +define i16 @atomicrmw_xchg_i16_seq_cst(ptr %a, i16 %b) nounwind { +; LA32-LABEL: atomicrmw_xchg_i16_seq_cst: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a2, $zero, -4 +; LA32-NEXT: and $a2, $a0, $a2 +; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: lu12i.w $a3, 15 +; LA32-NEXT: ori $a3, $a3, 4095 +; LA32-NEXT: sll.w $a3, $a3, $a0 +; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 +; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: .LBB95_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a4, $a2, 0 +; LA32-NEXT: addi.w $a5, $a1, 0 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a2, 0 +; LA32-NEXT: beqz $a5, .LBB95_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_xchg_i16_seq_cst: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: lu12i.w $a3, 15 +; LA64-NEXT: ori $a3, $a3, 4095 +; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB95_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a4, $a2, 0 +; LA64-NEXT: addi.w $a5, $a1, 0 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: beqz $a5, .LBB95_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: ret + %1 = atomicrmw xchg ptr %a, i16 %b seq_cst + ret i16 %1 +} + +define i16 @atomicrmw_xchg_0_i16_seq_cst(ptr %a) nounwind { +; LA32-LABEL: atomicrmw_xchg_0_i16_seq_cst: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a1, $zero, -4 +; LA32-NEXT: and $a1, $a0, $a1 +; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: lu12i.w $a2, 15 +; LA32-NEXT: ori $a2, $a2, 4095 +; LA32-NEXT: sll.w $a2, $a2, $a0 +; LA32-NEXT: .LBB96_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a3, $a1, 0 +; LA32-NEXT: addi.w $a4, $zero, 0 +; LA32-NEXT: xor $a4, $a3, $a4 +; LA32-NEXT: and $a4, $a4, $a2 +; LA32-NEXT: xor $a4, $a3, $a4 +; LA32-NEXT: sc.w $a4, $a1, 0 +; LA32-NEXT: beqz $a4, .LBB96_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a3, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_xchg_0_i16_seq_cst: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a1, $zero, -4 +; LA64-NEXT: and $a1, $a0, $a1 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: lu12i.w $a2, 15 +; LA64-NEXT: ori $a2, $a2, 4095 +; LA64-NEXT: sll.w $a2, $a2, $a0 +; LA64-NEXT: addi.w $a2, $a2, 0 +; LA64-NEXT: .LBB96_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a3, $a1, 0 +; LA64-NEXT: addi.w $a4, $zero, 0 +; LA64-NEXT: xor $a4, $a3, $a4 +; LA64-NEXT: and $a4, $a4, $a2 +; LA64-NEXT: xor $a4, $a3, $a4 +; LA64-NEXT: sc.w $a4, $a1, 0 +; LA64-NEXT: beqz $a4, .LBB96_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: srl.w $a0, $a3, $a0 +; LA64-NEXT: ret + %1 = atomicrmw xchg ptr %a, i16 0 seq_cst + ret i16 %1 +} + +define i16 @atomicrmw_xchg_minus_1_i16_seq_cst(ptr %a) nounwind { +; LA32-LABEL: atomicrmw_xchg_minus_1_i16_seq_cst: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a1, $zero, -4 +; LA32-NEXT: and $a1, $a0, $a1 +; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: lu12i.w $a2, 15 +; LA32-NEXT: ori $a2, $a2, 4095 +; LA32-NEXT: sll.w $a2, $a2, $a0 +; LA32-NEXT: .LBB97_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a3, $a1, 0 +; LA32-NEXT: addi.w $a4, $a2, 0 +; LA32-NEXT: xor $a4, $a3, $a4 +; LA32-NEXT: and $a4, $a4, $a2 +; LA32-NEXT: xor $a4, $a3, $a4 +; LA32-NEXT: sc.w $a4, $a1, 0 +; LA32-NEXT: beqz $a4, .LBB97_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a3, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_xchg_minus_1_i16_seq_cst: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a1, $zero, -4 +; LA64-NEXT: and $a1, $a0, $a1 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: lu12i.w $a2, 15 +; LA64-NEXT: ori $a2, $a2, 4095 +; LA64-NEXT: sll.w $a2, $a2, $a0 +; LA64-NEXT: addi.w $a2, $a2, 0 +; LA64-NEXT: .LBB97_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a3, $a1, 0 +; LA64-NEXT: addi.w $a4, $a2, 0 +; LA64-NEXT: xor $a4, $a3, $a4 +; LA64-NEXT: and $a4, $a4, $a2 +; LA64-NEXT: xor $a4, $a3, $a4 +; LA64-NEXT: sc.w $a4, $a1, 0 +; LA64-NEXT: beqz $a4, .LBB97_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: srl.w $a0, $a3, $a0 +; LA64-NEXT: ret + %1 = atomicrmw xchg ptr %a, i16 -1 seq_cst + ret i16 %1 +} + +define i32 @atomicrmw_xchg_i32_seq_cst(ptr %a, i32 %b) nounwind { +; LA32-LABEL: atomicrmw_xchg_i32_seq_cst: +; LA32: # %bb.0: +; LA32-NEXT: .LBB98_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a2, $a0, 0 +; LA32-NEXT: move $a3, $a1 +; LA32-NEXT: sc.w $a3, $a0, 0 +; LA32-NEXT: beqz $a3, .LBB98_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_xchg_i32_seq_cst: +; LA64: # %bb.0: +; LA64-NEXT: amswap_db.w $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw xchg ptr %a, i32 %b seq_cst + ret i32 %1 +} + +define i64 @atomicrmw_xchg_i64_seq_cst(ptr %a, i64 %b) nounwind { +; LA32-LABEL: atomicrmw_xchg_i64_seq_cst: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a3, $zero, 5 +; LA32-NEXT: bl %plt(__atomic_exchange_8) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_xchg_i64_seq_cst: +; LA64: # %bb.0: +; LA64-NEXT: amswap_db.d $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw xchg ptr %a, i64 %b seq_cst + ret i64 %1 +} + +define i8 @atomicrmw_add_i8_seq_cst(ptr %a, i8 %b) nounwind { +; LA32-LABEL: atomicrmw_add_i8_seq_cst: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a2, $zero, -4 +; LA32-NEXT: and $a2, $a0, $a2 +; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: ori $a3, $zero, 255 +; LA32-NEXT: sll.w $a3, $a3, $a0 +; LA32-NEXT: andi $a1, $a1, 255 +; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: .LBB100_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a4, $a2, 0 +; LA32-NEXT: add.w $a5, $a4, $a1 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a2, 0 +; LA32-NEXT: beqz $a5, .LBB100_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_add_i8_seq_cst: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: ori $a3, $zero, 255 +; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: andi $a1, $a1, 255 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB100_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a4, $a2, 0 +; LA64-NEXT: add.w $a5, $a4, $a1 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: beqz $a5, .LBB100_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: ret + %1 = atomicrmw add ptr %a, i8 %b seq_cst + ret i8 %1 +} + +define i16 @atomicrmw_add_i16_seq_cst(ptr %a, i16 %b) nounwind { +; LA32-LABEL: atomicrmw_add_i16_seq_cst: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a2, $zero, -4 +; LA32-NEXT: and $a2, $a0, $a2 +; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: lu12i.w $a3, 15 +; LA32-NEXT: ori $a3, $a3, 4095 +; LA32-NEXT: sll.w $a3, $a3, $a0 +; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 +; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: .LBB101_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a4, $a2, 0 +; LA32-NEXT: add.w $a5, $a4, $a1 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a2, 0 +; LA32-NEXT: beqz $a5, .LBB101_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_add_i16_seq_cst: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: lu12i.w $a3, 15 +; LA64-NEXT: ori $a3, $a3, 4095 +; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB101_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a4, $a2, 0 +; LA64-NEXT: add.w $a5, $a4, $a1 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: beqz $a5, .LBB101_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: ret + %1 = atomicrmw add ptr %a, i16 %b seq_cst + ret i16 %1 +} + +define i32 @atomicrmw_add_i32_seq_cst(ptr %a, i32 %b) nounwind { +; LA32-LABEL: atomicrmw_add_i32_seq_cst: +; LA32: # %bb.0: +; LA32-NEXT: .LBB102_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a2, $a0, 0 +; LA32-NEXT: add.w $a3, $a2, $a1 +; LA32-NEXT: sc.w $a3, $a0, 0 +; LA32-NEXT: beqz $a3, .LBB102_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_add_i32_seq_cst: +; LA64: # %bb.0: +; LA64-NEXT: amadd_db.w $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw add ptr %a, i32 %b seq_cst + ret i32 %1 +} + +define i64 @atomicrmw_add_i64_seq_cst(ptr %a, i64 %b) nounwind { +; LA32-LABEL: atomicrmw_add_i64_seq_cst: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a3, $zero, 5 +; LA32-NEXT: bl %plt(__atomic_fetch_add_8) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_add_i64_seq_cst: +; LA64: # %bb.0: +; LA64-NEXT: amadd_db.d $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw add ptr %a, i64 %b seq_cst + ret i64 %1 +} + +define i8 @atomicrmw_sub_i8_seq_cst(ptr %a, i8 %b) nounwind { +; LA32-LABEL: atomicrmw_sub_i8_seq_cst: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a2, $zero, -4 +; LA32-NEXT: and $a2, $a0, $a2 +; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: ori $a3, $zero, 255 +; LA32-NEXT: sll.w $a3, $a3, $a0 +; LA32-NEXT: andi $a1, $a1, 255 +; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: .LBB104_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a4, $a2, 0 +; LA32-NEXT: sub.w $a5, $a4, $a1 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a2, 0 +; LA32-NEXT: beqz $a5, .LBB104_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_sub_i8_seq_cst: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: ori $a3, $zero, 255 +; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: andi $a1, $a1, 255 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB104_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a4, $a2, 0 +; LA64-NEXT: sub.w $a5, $a4, $a1 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: beqz $a5, .LBB104_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: ret + %1 = atomicrmw sub ptr %a, i8 %b seq_cst + ret i8 %1 +} + +define i16 @atomicrmw_sub_i16_seq_cst(ptr %a, i16 %b) nounwind { +; LA32-LABEL: atomicrmw_sub_i16_seq_cst: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a2, $zero, -4 +; LA32-NEXT: and $a2, $a0, $a2 +; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: lu12i.w $a3, 15 +; LA32-NEXT: ori $a3, $a3, 4095 +; LA32-NEXT: sll.w $a3, $a3, $a0 +; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 +; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: .LBB105_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a4, $a2, 0 +; LA32-NEXT: sub.w $a5, $a4, $a1 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a2, 0 +; LA32-NEXT: beqz $a5, .LBB105_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_sub_i16_seq_cst: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: lu12i.w $a3, 15 +; LA64-NEXT: ori $a3, $a3, 4095 +; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB105_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a4, $a2, 0 +; LA64-NEXT: sub.w $a5, $a4, $a1 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: beqz $a5, .LBB105_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: ret + %1 = atomicrmw sub ptr %a, i16 %b seq_cst + ret i16 %1 +} + +define i32 @atomicrmw_sub_i32_seq_cst(ptr %a, i32 %b) nounwind { +; LA32-LABEL: atomicrmw_sub_i32_seq_cst: +; LA32: # %bb.0: +; LA32-NEXT: .LBB106_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a2, $a0, 0 +; LA32-NEXT: sub.w $a3, $a2, $a1 +; LA32-NEXT: sc.w $a3, $a0, 0 +; LA32-NEXT: beqz $a3, .LBB106_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_sub_i32_seq_cst: +; LA64: # %bb.0: +; LA64-NEXT: sub.w $a2, $zero, $a1 +; LA64-NEXT: amadd_db.w $a1, $a2, $a0 +; LA64-NEXT: move $a0, $a1 +; LA64-NEXT: ret + %1 = atomicrmw sub ptr %a, i32 %b seq_cst + ret i32 %1 +} + +define i64 @atomicrmw_sub_i64_seq_cst(ptr %a, i64 %b) nounwind { +; LA32-LABEL: atomicrmw_sub_i64_seq_cst: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a3, $zero, 5 +; LA32-NEXT: bl %plt(__atomic_fetch_sub_8) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_sub_i64_seq_cst: +; LA64: # %bb.0: +; LA64-NEXT: sub.d $a2, $zero, $a1 +; LA64-NEXT: amadd_db.d $a1, $a2, $a0 +; LA64-NEXT: move $a0, $a1 +; LA64-NEXT: ret + %1 = atomicrmw sub ptr %a, i64 %b seq_cst + ret i64 %1 +} + +define i8 @atomicrmw_nand_i8_seq_cst(ptr %a, i8 %b) nounwind { +; LA32-LABEL: atomicrmw_nand_i8_seq_cst: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a2, $zero, -4 +; LA32-NEXT: and $a2, $a0, $a2 +; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: ori $a3, $zero, 255 +; LA32-NEXT: sll.w $a3, $a3, $a0 +; LA32-NEXT: andi $a1, $a1, 255 +; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: .LBB108_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a4, $a2, 0 +; LA32-NEXT: and $a5, $a4, $a1 +; LA32-NEXT: nor $a5, $a5, $zero +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a2, 0 +; LA32-NEXT: beqz $a5, .LBB108_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_nand_i8_seq_cst: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: ori $a3, $zero, 255 +; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: andi $a1, $a1, 255 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB108_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a4, $a2, 0 +; LA64-NEXT: and $a5, $a4, $a1 +; LA64-NEXT: nor $a5, $a5, $zero +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: beqz $a5, .LBB108_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: ret + %1 = atomicrmw nand ptr %a, i8 %b seq_cst + ret i8 %1 +} + +define i16 @atomicrmw_nand_i16_seq_cst(ptr %a, i16 %b) nounwind { +; LA32-LABEL: atomicrmw_nand_i16_seq_cst: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a2, $zero, -4 +; LA32-NEXT: and $a2, $a0, $a2 +; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: lu12i.w $a3, 15 +; LA32-NEXT: ori $a3, $a3, 4095 +; LA32-NEXT: sll.w $a3, $a3, $a0 +; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 +; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: .LBB109_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a4, $a2, 0 +; LA32-NEXT: and $a5, $a4, $a1 +; LA32-NEXT: nor $a5, $a5, $zero +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a2, 0 +; LA32-NEXT: beqz $a5, .LBB109_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_nand_i16_seq_cst: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: lu12i.w $a3, 15 +; LA64-NEXT: ori $a3, $a3, 4095 +; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB109_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a4, $a2, 0 +; LA64-NEXT: and $a5, $a4, $a1 +; LA64-NEXT: nor $a5, $a5, $zero +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: beqz $a5, .LBB109_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: ret + %1 = atomicrmw nand ptr %a, i16 %b seq_cst + ret i16 %1 +} + +define i32 @atomicrmw_nand_i32_seq_cst(ptr %a, i32 %b) nounwind { +; LA32-LABEL: atomicrmw_nand_i32_seq_cst: +; LA32: # %bb.0: +; LA32-NEXT: .LBB110_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a2, $a0, 0 +; LA32-NEXT: and $a3, $a2, $a1 +; LA32-NEXT: nor $a3, $a3, $zero +; LA32-NEXT: sc.w $a3, $a0, 0 +; LA32-NEXT: beqz $a3, .LBB110_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_nand_i32_seq_cst: +; LA64: # %bb.0: +; LA64-NEXT: .LBB110_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a2, $a0, 0 +; LA64-NEXT: and $a3, $a2, $a1 +; LA64-NEXT: nor $a3, $a3, $zero +; LA64-NEXT: sc.w $a3, $a0, 0 +; LA64-NEXT: beqz $a3, .LBB110_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw nand ptr %a, i32 %b seq_cst + ret i32 %1 +} + +define i64 @atomicrmw_nand_i64_seq_cst(ptr %a, i64 %b) nounwind { +; LA32-LABEL: atomicrmw_nand_i64_seq_cst: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a3, $zero, 5 +; LA32-NEXT: bl %plt(__atomic_fetch_nand_8) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_nand_i64_seq_cst: +; LA64: # %bb.0: +; LA64-NEXT: .LBB111_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.d $a2, $a0, 0 +; LA64-NEXT: and $a3, $a2, $a1 +; LA64-NEXT: nor $a3, $a3, $zero +; LA64-NEXT: sc.d $a3, $a0, 0 +; LA64-NEXT: beqz $a3, .LBB111_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw nand ptr %a, i64 %b seq_cst + ret i64 %1 +} + +define i8 @atomicrmw_and_i8_seq_cst(ptr %a, i8 %b) nounwind { +; LA32-LABEL: atomicrmw_and_i8_seq_cst: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a2, $a0, 3 +; LA32-NEXT: ori $a3, $zero, 255 +; LA32-NEXT: sll.w $a3, $a3, $a2 +; LA32-NEXT: andi $a1, $a1, 255 +; LA32-NEXT: sll.w $a1, $a1, $a2 +; LA32-NEXT: orn $a1, $a1, $a3 +; LA32-NEXT: addi.w $a3, $zero, -4 +; LA32-NEXT: and $a0, $a0, $a3 +; LA32-NEXT: .LBB112_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a3, $a0, 0 +; LA32-NEXT: and $a4, $a3, $a1 +; LA32-NEXT: sc.w $a4, $a0, 0 +; LA32-NEXT: beqz $a4, .LBB112_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a3, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_and_i8_seq_cst: +; LA64: # %bb.0: +; LA64-NEXT: slli.d $a2, $a0, 3 +; LA64-NEXT: ori $a3, $zero, 255 +; LA64-NEXT: sll.w $a3, $a3, $a2 +; LA64-NEXT: andi $a1, $a1, 255 +; LA64-NEXT: sll.w $a1, $a1, $a2 +; LA64-NEXT: orn $a1, $a1, $a3 +; LA64-NEXT: addi.w $a3, $zero, -4 +; LA64-NEXT: and $a0, $a0, $a3 +; LA64-NEXT: amand_db.w $a3, $a1, $a0 +; LA64-NEXT: srl.w $a0, $a3, $a2 +; LA64-NEXT: ret + %1 = atomicrmw and ptr %a, i8 %b seq_cst + ret i8 %1 +} + +define i16 @atomicrmw_and_i16_seq_cst(ptr %a, i16 %b) nounwind { +; LA32-LABEL: atomicrmw_and_i16_seq_cst: +; LA32: # %bb.0: +; LA32-NEXT: lu12i.w $a2, 15 +; LA32-NEXT: ori $a2, $a2, 4095 +; LA32-NEXT: slli.w $a3, $a0, 3 +; LA32-NEXT: sll.w $a2, $a2, $a3 +; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 +; LA32-NEXT: sll.w $a1, $a1, $a3 +; LA32-NEXT: orn $a1, $a1, $a2 +; LA32-NEXT: addi.w $a2, $zero, -4 +; LA32-NEXT: and $a0, $a0, $a2 +; LA32-NEXT: .LBB113_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a2, $a0, 0 +; LA32-NEXT: and $a4, $a2, $a1 +; LA32-NEXT: sc.w $a4, $a0, 0 +; LA32-NEXT: beqz $a4, .LBB113_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a2, $a3 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_and_i16_seq_cst: +; LA64: # %bb.0: +; LA64-NEXT: lu12i.w $a2, 15 +; LA64-NEXT: ori $a2, $a2, 4095 +; LA64-NEXT: slli.d $a3, $a0, 3 +; LA64-NEXT: sll.w $a2, $a2, $a3 +; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 +; LA64-NEXT: sll.w $a1, $a1, $a3 +; LA64-NEXT: orn $a1, $a1, $a2 +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a0, $a0, $a2 +; LA64-NEXT: amand_db.w $a2, $a1, $a0 +; LA64-NEXT: srl.w $a0, $a2, $a3 +; LA64-NEXT: ret + %1 = atomicrmw and ptr %a, i16 %b seq_cst + ret i16 %1 +} + +define i32 @atomicrmw_and_i32_seq_cst(ptr %a, i32 %b) nounwind { +; LA32-LABEL: atomicrmw_and_i32_seq_cst: +; LA32: # %bb.0: +; LA32-NEXT: .LBB114_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a2, $a0, 0 +; LA32-NEXT: and $a3, $a2, $a1 +; LA32-NEXT: sc.w $a3, $a0, 0 +; LA32-NEXT: beqz $a3, .LBB114_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_and_i32_seq_cst: +; LA64: # %bb.0: +; LA64-NEXT: amand_db.w $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw and ptr %a, i32 %b seq_cst + ret i32 %1 +} + +define i64 @atomicrmw_and_i64_seq_cst(ptr %a, i64 %b) nounwind { +; LA32-LABEL: atomicrmw_and_i64_seq_cst: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a3, $zero, 5 +; LA32-NEXT: bl %plt(__atomic_fetch_and_8) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_and_i64_seq_cst: +; LA64: # %bb.0: +; LA64-NEXT: amand_db.d $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw and ptr %a, i64 %b seq_cst + ret i64 %1 +} + +define i8 @atomicrmw_or_i8_seq_cst(ptr %a, i8 %b) nounwind { +; LA32-LABEL: atomicrmw_or_i8_seq_cst: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a2, $zero, -4 +; LA32-NEXT: and $a2, $a0, $a2 +; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: andi $a1, $a1, 255 +; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: .LBB116_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a3, $a2, 0 +; LA32-NEXT: or $a4, $a3, $a1 +; LA32-NEXT: sc.w $a4, $a2, 0 +; LA32-NEXT: beqz $a4, .LBB116_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a3, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_or_i8_seq_cst: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: andi $a1, $a1, 255 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: amor_db.w $a3, $a1, $a2 +; LA64-NEXT: srl.w $a0, $a3, $a0 +; LA64-NEXT: ret + %1 = atomicrmw or ptr %a, i8 %b seq_cst + ret i8 %1 +} + +define i16 @atomicrmw_or_i16_seq_cst(ptr %a, i16 %b) nounwind { +; LA32-LABEL: atomicrmw_or_i16_seq_cst: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a2, $zero, -4 +; LA32-NEXT: and $a2, $a0, $a2 +; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 +; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: .LBB117_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a3, $a2, 0 +; LA32-NEXT: or $a4, $a3, $a1 +; LA32-NEXT: sc.w $a4, $a2, 0 +; LA32-NEXT: beqz $a4, .LBB117_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a3, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_or_i16_seq_cst: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: amor_db.w $a3, $a1, $a2 +; LA64-NEXT: srl.w $a0, $a3, $a0 +; LA64-NEXT: ret + %1 = atomicrmw or ptr %a, i16 %b seq_cst + ret i16 %1 +} + +define i32 @atomicrmw_or_i32_seq_cst(ptr %a, i32 %b) nounwind { +; LA32-LABEL: atomicrmw_or_i32_seq_cst: +; LA32: # %bb.0: +; LA32-NEXT: .LBB118_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a2, $a0, 0 +; LA32-NEXT: or $a3, $a2, $a1 +; LA32-NEXT: sc.w $a3, $a0, 0 +; LA32-NEXT: beqz $a3, .LBB118_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_or_i32_seq_cst: +; LA64: # %bb.0: +; LA64-NEXT: amor_db.w $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw or ptr %a, i32 %b seq_cst + ret i32 %1 +} + +define i64 @atomicrmw_or_i64_seq_cst(ptr %a, i64 %b) nounwind { +; LA32-LABEL: atomicrmw_or_i64_seq_cst: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a3, $zero, 5 +; LA32-NEXT: bl %plt(__atomic_fetch_or_8) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_or_i64_seq_cst: +; LA64: # %bb.0: +; LA64-NEXT: amor_db.d $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw or ptr %a, i64 %b seq_cst + ret i64 %1 +} + +define i8 @atomicrmw_xor_i8_seq_cst(ptr %a, i8 %b) nounwind { +; LA32-LABEL: atomicrmw_xor_i8_seq_cst: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a2, $zero, -4 +; LA32-NEXT: and $a2, $a0, $a2 +; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: andi $a1, $a1, 255 +; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: .LBB120_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a3, $a2, 0 +; LA32-NEXT: xor $a4, $a3, $a1 +; LA32-NEXT: sc.w $a4, $a2, 0 +; LA32-NEXT: beqz $a4, .LBB120_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a3, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_xor_i8_seq_cst: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: andi $a1, $a1, 255 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: amxor_db.w $a3, $a1, $a2 +; LA64-NEXT: srl.w $a0, $a3, $a0 +; LA64-NEXT: ret + %1 = atomicrmw xor ptr %a, i8 %b seq_cst + ret i8 %1 +} + +define i16 @atomicrmw_xor_i16_seq_cst(ptr %a, i16 %b) nounwind { +; LA32-LABEL: atomicrmw_xor_i16_seq_cst: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a2, $zero, -4 +; LA32-NEXT: and $a2, $a0, $a2 +; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 +; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: .LBB121_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a3, $a2, 0 +; LA32-NEXT: xor $a4, $a3, $a1 +; LA32-NEXT: sc.w $a4, $a2, 0 +; LA32-NEXT: beqz $a4, .LBB121_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a3, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_xor_i16_seq_cst: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: amxor_db.w $a3, $a1, $a2 +; LA64-NEXT: srl.w $a0, $a3, $a0 +; LA64-NEXT: ret + %1 = atomicrmw xor ptr %a, i16 %b seq_cst + ret i16 %1 +} + +define i32 @atomicrmw_xor_i32_seq_cst(ptr %a, i32 %b) nounwind { +; LA32-LABEL: atomicrmw_xor_i32_seq_cst: +; LA32: # %bb.0: +; LA32-NEXT: .LBB122_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a2, $a0, 0 +; LA32-NEXT: xor $a3, $a2, $a1 +; LA32-NEXT: sc.w $a3, $a0, 0 +; LA32-NEXT: beqz $a3, .LBB122_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_xor_i32_seq_cst: +; LA64: # %bb.0: +; LA64-NEXT: amxor_db.w $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw xor ptr %a, i32 %b seq_cst + ret i32 %1 +} + +define i64 @atomicrmw_xor_i64_seq_cst(ptr %a, i64 %b) nounwind { +; LA32-LABEL: atomicrmw_xor_i64_seq_cst: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a3, $zero, 5 +; LA32-NEXT: bl %plt(__atomic_fetch_xor_8) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_xor_i64_seq_cst: +; LA64: # %bb.0: +; LA64-NEXT: amxor_db.d $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw xor ptr %a, i64 %b seq_cst + ret i64 %1 +} + define i8 @atomicrmw_xchg_i8_monotonic(ptr %a, i8 %b) nounwind { ; LA32-LABEL: atomicrmw_xchg_i8_monotonic: ; LA32: # %bb.0: @@ -941,14 +4132,14 @@ define i8 @atomicrmw_xchg_i8_monotonic(ptr %a, i8 %b) nounwind { ; LA32-NEXT: sll.w $a3, $a3, $a0 ; LA32-NEXT: andi $a1, $a1, 255 ; LA32-NEXT: sll.w $a1, $a1, $a0 -; LA32-NEXT: .LBB28_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: .LBB124_1: # =>This Inner Loop Header: Depth=1 ; LA32-NEXT: ll.w $a4, $a2, 0 ; LA32-NEXT: addi.w $a5, $a1, 0 ; LA32-NEXT: xor $a5, $a4, $a5 ; LA32-NEXT: and $a5, $a5, $a3 ; LA32-NEXT: xor $a5, $a4, $a5 ; LA32-NEXT: sc.w $a5, $a2, 0 -; LA32-NEXT: beqz $a5, .LBB28_1 +; LA32-NEXT: beqz $a5, .LBB124_1 ; LA32-NEXT: # %bb.2: ; LA32-NEXT: srl.w $a0, $a4, $a0 ; LA32-NEXT: ret @@ -964,14 +4155,14 @@ define i8 @atomicrmw_xchg_i8_monotonic(ptr %a, i8 %b) nounwind { ; LA64-NEXT: andi $a1, $a1, 255 ; LA64-NEXT: sll.w $a1, $a1, $a0 ; LA64-NEXT: addi.w $a1, $a1, 0 -; LA64-NEXT: .LBB28_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: .LBB124_1: # =>This Inner Loop Header: Depth=1 ; LA64-NEXT: ll.w $a4, $a2, 0 ; LA64-NEXT: addi.w $a5, $a1, 0 ; LA64-NEXT: xor $a5, $a4, $a5 ; LA64-NEXT: and $a5, $a5, $a3 ; LA64-NEXT: xor $a5, $a4, $a5 ; LA64-NEXT: sc.w $a5, $a2, 0 -; LA64-NEXT: beqz $a5, .LBB28_1 +; LA64-NEXT: beqz $a5, .LBB124_1 ; LA64-NEXT: # %bb.2: ; LA64-NEXT: srl.w $a0, $a4, $a0 ; LA64-NEXT: ret @@ -990,14 +4181,14 @@ define i16 @atomicrmw_xchg_i16_monotonic(ptr %a, i16 %b) nounwind { ; LA32-NEXT: sll.w $a3, $a3, $a0 ; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 ; LA32-NEXT: sll.w $a1, $a1, $a0 -; LA32-NEXT: .LBB29_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: .LBB125_1: # =>This Inner Loop Header: Depth=1 ; LA32-NEXT: ll.w $a4, $a2, 0 ; LA32-NEXT: addi.w $a5, $a1, 0 ; LA32-NEXT: xor $a5, $a4, $a5 ; LA32-NEXT: and $a5, $a5, $a3 ; LA32-NEXT: xor $a5, $a4, $a5 ; LA32-NEXT: sc.w $a5, $a2, 0 -; LA32-NEXT: beqz $a5, .LBB29_1 +; LA32-NEXT: beqz $a5, .LBB125_1 ; LA32-NEXT: # %bb.2: ; LA32-NEXT: srl.w $a0, $a4, $a0 ; LA32-NEXT: ret @@ -1014,14 +4205,14 @@ define i16 @atomicrmw_xchg_i16_monotonic(ptr %a, i16 %b) nounwind { ; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 ; LA64-NEXT: sll.w $a1, $a1, $a0 ; LA64-NEXT: addi.w $a1, $a1, 0 -; LA64-NEXT: .LBB29_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: .LBB125_1: # =>This Inner Loop Header: Depth=1 ; LA64-NEXT: ll.w $a4, $a2, 0 ; LA64-NEXT: addi.w $a5, $a1, 0 ; LA64-NEXT: xor $a5, $a4, $a5 ; LA64-NEXT: and $a5, $a5, $a3 ; LA64-NEXT: xor $a5, $a4, $a5 ; LA64-NEXT: sc.w $a5, $a2, 0 -; LA64-NEXT: beqz $a5, .LBB29_1 +; LA64-NEXT: beqz $a5, .LBB125_1 ; LA64-NEXT: # %bb.2: ; LA64-NEXT: srl.w $a0, $a4, $a0 ; LA64-NEXT: ret @@ -1032,11 +4223,11 @@ define i16 @atomicrmw_xchg_i16_monotonic(ptr %a, i16 %b) nounwind { define i32 @atomicrmw_xchg_i32_monotonic(ptr %a, i32 %b) nounwind { ; LA32-LABEL: atomicrmw_xchg_i32_monotonic: ; LA32: # %bb.0: -; LA32-NEXT: .LBB30_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: .LBB126_1: # =>This Inner Loop Header: Depth=1 ; LA32-NEXT: ll.w $a2, $a0, 0 ; LA32-NEXT: move $a3, $a1 ; LA32-NEXT: sc.w $a3, $a0, 0 -; LA32-NEXT: beqz $a3, .LBB30_1 +; LA32-NEXT: beqz $a3, .LBB126_1 ; LA32-NEXT: # %bb.2: ; LA32-NEXT: move $a0, $a2 ; LA32-NEXT: ret @@ -1080,14 +4271,14 @@ define i8 @atomicrmw_add_i8_monotonic(ptr %a, i8 %b) nounwind { ; LA32-NEXT: sll.w $a3, $a3, $a0 ; LA32-NEXT: andi $a1, $a1, 255 ; LA32-NEXT: sll.w $a1, $a1, $a0 -; LA32-NEXT: .LBB32_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: .LBB128_1: # =>This Inner Loop Header: Depth=1 ; LA32-NEXT: ll.w $a4, $a2, 0 ; LA32-NEXT: add.w $a5, $a4, $a1 ; LA32-NEXT: xor $a5, $a4, $a5 ; LA32-NEXT: and $a5, $a5, $a3 ; LA32-NEXT: xor $a5, $a4, $a5 ; LA32-NEXT: sc.w $a5, $a2, 0 -; LA32-NEXT: beqz $a5, .LBB32_1 +; LA32-NEXT: beqz $a5, .LBB128_1 ; LA32-NEXT: # %bb.2: ; LA32-NEXT: srl.w $a0, $a4, $a0 ; LA32-NEXT: ret @@ -1103,14 +4294,14 @@ define i8 @atomicrmw_add_i8_monotonic(ptr %a, i8 %b) nounwind { ; LA64-NEXT: andi $a1, $a1, 255 ; LA64-NEXT: sll.w $a1, $a1, $a0 ; LA64-NEXT: addi.w $a1, $a1, 0 -; LA64-NEXT: .LBB32_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: .LBB128_1: # =>This Inner Loop Header: Depth=1 ; LA64-NEXT: ll.w $a4, $a2, 0 ; LA64-NEXT: add.w $a5, $a4, $a1 ; LA64-NEXT: xor $a5, $a4, $a5 ; LA64-NEXT: and $a5, $a5, $a3 ; LA64-NEXT: xor $a5, $a4, $a5 ; LA64-NEXT: sc.w $a5, $a2, 0 -; LA64-NEXT: beqz $a5, .LBB32_1 +; LA64-NEXT: beqz $a5, .LBB128_1 ; LA64-NEXT: # %bb.2: ; LA64-NEXT: srl.w $a0, $a4, $a0 ; LA64-NEXT: ret @@ -1129,14 +4320,14 @@ define i16 @atomicrmw_add_i16_monotonic(ptr %a, i16 %b) nounwind { ; LA32-NEXT: sll.w $a3, $a3, $a0 ; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 ; LA32-NEXT: sll.w $a1, $a1, $a0 -; LA32-NEXT: .LBB33_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: .LBB129_1: # =>This Inner Loop Header: Depth=1 ; LA32-NEXT: ll.w $a4, $a2, 0 ; LA32-NEXT: add.w $a5, $a4, $a1 ; LA32-NEXT: xor $a5, $a4, $a5 ; LA32-NEXT: and $a5, $a5, $a3 ; LA32-NEXT: xor $a5, $a4, $a5 ; LA32-NEXT: sc.w $a5, $a2, 0 -; LA32-NEXT: beqz $a5, .LBB33_1 +; LA32-NEXT: beqz $a5, .LBB129_1 ; LA32-NEXT: # %bb.2: ; LA32-NEXT: srl.w $a0, $a4, $a0 ; LA32-NEXT: ret @@ -1153,14 +4344,14 @@ define i16 @atomicrmw_add_i16_monotonic(ptr %a, i16 %b) nounwind { ; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 ; LA64-NEXT: sll.w $a1, $a1, $a0 ; LA64-NEXT: addi.w $a1, $a1, 0 -; LA64-NEXT: .LBB33_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: .LBB129_1: # =>This Inner Loop Header: Depth=1 ; LA64-NEXT: ll.w $a4, $a2, 0 ; LA64-NEXT: add.w $a5, $a4, $a1 ; LA64-NEXT: xor $a5, $a4, $a5 ; LA64-NEXT: and $a5, $a5, $a3 ; LA64-NEXT: xor $a5, $a4, $a5 ; LA64-NEXT: sc.w $a5, $a2, 0 -; LA64-NEXT: beqz $a5, .LBB33_1 +; LA64-NEXT: beqz $a5, .LBB129_1 ; LA64-NEXT: # %bb.2: ; LA64-NEXT: srl.w $a0, $a4, $a0 ; LA64-NEXT: ret @@ -1171,11 +4362,11 @@ define i16 @atomicrmw_add_i16_monotonic(ptr %a, i16 %b) nounwind { define i32 @atomicrmw_add_i32_monotonic(ptr %a, i32 %b) nounwind { ; LA32-LABEL: atomicrmw_add_i32_monotonic: ; LA32: # %bb.0: -; LA32-NEXT: .LBB34_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: .LBB130_1: # =>This Inner Loop Header: Depth=1 ; LA32-NEXT: ll.w $a2, $a0, 0 ; LA32-NEXT: add.w $a3, $a2, $a1 ; LA32-NEXT: sc.w $a3, $a0, 0 -; LA32-NEXT: beqz $a3, .LBB34_1 +; LA32-NEXT: beqz $a3, .LBB130_1 ; LA32-NEXT: # %bb.2: ; LA32-NEXT: move $a0, $a2 ; LA32-NEXT: ret @@ -1219,14 +4410,14 @@ define i8 @atomicrmw_sub_i8_monotonic(ptr %a, i8 %b) nounwind { ; LA32-NEXT: sll.w $a3, $a3, $a0 ; LA32-NEXT: andi $a1, $a1, 255 ; LA32-NEXT: sll.w $a1, $a1, $a0 -; LA32-NEXT: .LBB36_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: .LBB132_1: # =>This Inner Loop Header: Depth=1 ; LA32-NEXT: ll.w $a4, $a2, 0 ; LA32-NEXT: sub.w $a5, $a4, $a1 ; LA32-NEXT: xor $a5, $a4, $a5 ; LA32-NEXT: and $a5, $a5, $a3 ; LA32-NEXT: xor $a5, $a4, $a5 ; LA32-NEXT: sc.w $a5, $a2, 0 -; LA32-NEXT: beqz $a5, .LBB36_1 +; LA32-NEXT: beqz $a5, .LBB132_1 ; LA32-NEXT: # %bb.2: ; LA32-NEXT: srl.w $a0, $a4, $a0 ; LA32-NEXT: ret @@ -1242,14 +4433,14 @@ define i8 @atomicrmw_sub_i8_monotonic(ptr %a, i8 %b) nounwind { ; LA64-NEXT: andi $a1, $a1, 255 ; LA64-NEXT: sll.w $a1, $a1, $a0 ; LA64-NEXT: addi.w $a1, $a1, 0 -; LA64-NEXT: .LBB36_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: .LBB132_1: # =>This Inner Loop Header: Depth=1 ; LA64-NEXT: ll.w $a4, $a2, 0 ; LA64-NEXT: sub.w $a5, $a4, $a1 ; LA64-NEXT: xor $a5, $a4, $a5 ; LA64-NEXT: and $a5, $a5, $a3 ; LA64-NEXT: xor $a5, $a4, $a5 ; LA64-NEXT: sc.w $a5, $a2, 0 -; LA64-NEXT: beqz $a5, .LBB36_1 +; LA64-NEXT: beqz $a5, .LBB132_1 ; LA64-NEXT: # %bb.2: ; LA64-NEXT: srl.w $a0, $a4, $a0 ; LA64-NEXT: ret @@ -1268,14 +4459,14 @@ define i16 @atomicrmw_sub_i16_monotonic(ptr %a, i16 %b) nounwind { ; LA32-NEXT: sll.w $a3, $a3, $a0 ; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 ; LA32-NEXT: sll.w $a1, $a1, $a0 -; LA32-NEXT: .LBB37_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: .LBB133_1: # =>This Inner Loop Header: Depth=1 ; LA32-NEXT: ll.w $a4, $a2, 0 ; LA32-NEXT: sub.w $a5, $a4, $a1 ; LA32-NEXT: xor $a5, $a4, $a5 ; LA32-NEXT: and $a5, $a5, $a3 ; LA32-NEXT: xor $a5, $a4, $a5 ; LA32-NEXT: sc.w $a5, $a2, 0 -; LA32-NEXT: beqz $a5, .LBB37_1 +; LA32-NEXT: beqz $a5, .LBB133_1 ; LA32-NEXT: # %bb.2: ; LA32-NEXT: srl.w $a0, $a4, $a0 ; LA32-NEXT: ret @@ -1292,14 +4483,14 @@ define i16 @atomicrmw_sub_i16_monotonic(ptr %a, i16 %b) nounwind { ; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 ; LA64-NEXT: sll.w $a1, $a1, $a0 ; LA64-NEXT: addi.w $a1, $a1, 0 -; LA64-NEXT: .LBB37_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: .LBB133_1: # =>This Inner Loop Header: Depth=1 ; LA64-NEXT: ll.w $a4, $a2, 0 ; LA64-NEXT: sub.w $a5, $a4, $a1 ; LA64-NEXT: xor $a5, $a4, $a5 ; LA64-NEXT: and $a5, $a5, $a3 ; LA64-NEXT: xor $a5, $a4, $a5 ; LA64-NEXT: sc.w $a5, $a2, 0 -; LA64-NEXT: beqz $a5, .LBB37_1 +; LA64-NEXT: beqz $a5, .LBB133_1 ; LA64-NEXT: # %bb.2: ; LA64-NEXT: srl.w $a0, $a4, $a0 ; LA64-NEXT: ret @@ -1310,11 +4501,11 @@ define i16 @atomicrmw_sub_i16_monotonic(ptr %a, i16 %b) nounwind { define i32 @atomicrmw_sub_i32_monotonic(ptr %a, i32 %b) nounwind { ; LA32-LABEL: atomicrmw_sub_i32_monotonic: ; LA32: # %bb.0: -; LA32-NEXT: .LBB38_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: .LBB134_1: # =>This Inner Loop Header: Depth=1 ; LA32-NEXT: ll.w $a2, $a0, 0 ; LA32-NEXT: sub.w $a3, $a2, $a1 ; LA32-NEXT: sc.w $a3, $a0, 0 -; LA32-NEXT: beqz $a3, .LBB38_1 +; LA32-NEXT: beqz $a3, .LBB134_1 ; LA32-NEXT: # %bb.2: ; LA32-NEXT: move $a0, $a2 ; LA32-NEXT: ret @@ -1360,7 +4551,7 @@ define i8 @atomicrmw_nand_i8_monotonic(ptr %a, i8 %b) nounwind { ; LA32-NEXT: sll.w $a3, $a3, $a0 ; LA32-NEXT: andi $a1, $a1, 255 ; LA32-NEXT: sll.w $a1, $a1, $a0 -; LA32-NEXT: .LBB40_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: .LBB136_1: # =>This Inner Loop Header: Depth=1 ; LA32-NEXT: ll.w $a4, $a2, 0 ; LA32-NEXT: and $a5, $a4, $a1 ; LA32-NEXT: nor $a5, $a5, $zero @@ -1368,7 +4559,7 @@ define i8 @atomicrmw_nand_i8_monotonic(ptr %a, i8 %b) nounwind { ; LA32-NEXT: and $a5, $a5, $a3 ; LA32-NEXT: xor $a5, $a4, $a5 ; LA32-NEXT: sc.w $a5, $a2, 0 -; LA32-NEXT: beqz $a5, .LBB40_1 +; LA32-NEXT: beqz $a5, .LBB136_1 ; LA32-NEXT: # %bb.2: ; LA32-NEXT: srl.w $a0, $a4, $a0 ; LA32-NEXT: ret @@ -1384,7 +4575,7 @@ define i8 @atomicrmw_nand_i8_monotonic(ptr %a, i8 %b) nounwind { ; LA64-NEXT: andi $a1, $a1, 255 ; LA64-NEXT: sll.w $a1, $a1, $a0 ; LA64-NEXT: addi.w $a1, $a1, 0 -; LA64-NEXT: .LBB40_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: .LBB136_1: # =>This Inner Loop Header: Depth=1 ; LA64-NEXT: ll.w $a4, $a2, 0 ; LA64-NEXT: and $a5, $a4, $a1 ; LA64-NEXT: nor $a5, $a5, $zero @@ -1392,7 +4583,7 @@ define i8 @atomicrmw_nand_i8_monotonic(ptr %a, i8 %b) nounwind { ; LA64-NEXT: and $a5, $a5, $a3 ; LA64-NEXT: xor $a5, $a4, $a5 ; LA64-NEXT: sc.w $a5, $a2, 0 -; LA64-NEXT: beqz $a5, .LBB40_1 +; LA64-NEXT: beqz $a5, .LBB136_1 ; LA64-NEXT: # %bb.2: ; LA64-NEXT: srl.w $a0, $a4, $a0 ; LA64-NEXT: ret @@ -1411,7 +4602,7 @@ define i16 @atomicrmw_nand_i16_monotonic(ptr %a, i16 %b) nounwind { ; LA32-NEXT: sll.w $a3, $a3, $a0 ; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 ; LA32-NEXT: sll.w $a1, $a1, $a0 -; LA32-NEXT: .LBB41_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: .LBB137_1: # =>This Inner Loop Header: Depth=1 ; LA32-NEXT: ll.w $a4, $a2, 0 ; LA32-NEXT: and $a5, $a4, $a1 ; LA32-NEXT: nor $a5, $a5, $zero @@ -1419,7 +4610,7 @@ define i16 @atomicrmw_nand_i16_monotonic(ptr %a, i16 %b) nounwind { ; LA32-NEXT: and $a5, $a5, $a3 ; LA32-NEXT: xor $a5, $a4, $a5 ; LA32-NEXT: sc.w $a5, $a2, 0 -; LA32-NEXT: beqz $a5, .LBB41_1 +; LA32-NEXT: beqz $a5, .LBB137_1 ; LA32-NEXT: # %bb.2: ; LA32-NEXT: srl.w $a0, $a4, $a0 ; LA32-NEXT: ret @@ -1436,7 +4627,7 @@ define i16 @atomicrmw_nand_i16_monotonic(ptr %a, i16 %b) nounwind { ; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 ; LA64-NEXT: sll.w $a1, $a1, $a0 ; LA64-NEXT: addi.w $a1, $a1, 0 -; LA64-NEXT: .LBB41_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: .LBB137_1: # =>This Inner Loop Header: Depth=1 ; LA64-NEXT: ll.w $a4, $a2, 0 ; LA64-NEXT: and $a5, $a4, $a1 ; LA64-NEXT: nor $a5, $a5, $zero @@ -1444,7 +4635,7 @@ define i16 @atomicrmw_nand_i16_monotonic(ptr %a, i16 %b) nounwind { ; LA64-NEXT: and $a5, $a5, $a3 ; LA64-NEXT: xor $a5, $a4, $a5 ; LA64-NEXT: sc.w $a5, $a2, 0 -; LA64-NEXT: beqz $a5, .LBB41_1 +; LA64-NEXT: beqz $a5, .LBB137_1 ; LA64-NEXT: # %bb.2: ; LA64-NEXT: srl.w $a0, $a4, $a0 ; LA64-NEXT: ret @@ -1455,24 +4646,24 @@ define i16 @atomicrmw_nand_i16_monotonic(ptr %a, i16 %b) nounwind { define i32 @atomicrmw_nand_i32_monotonic(ptr %a, i32 %b) nounwind { ; LA32-LABEL: atomicrmw_nand_i32_monotonic: ; LA32: # %bb.0: -; LA32-NEXT: .LBB42_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: .LBB138_1: # =>This Inner Loop Header: Depth=1 ; LA32-NEXT: ll.w $a2, $a0, 0 ; LA32-NEXT: and $a3, $a2, $a1 ; LA32-NEXT: nor $a3, $a3, $zero ; LA32-NEXT: sc.w $a3, $a0, 0 -; LA32-NEXT: beqz $a3, .LBB42_1 +; LA32-NEXT: beqz $a3, .LBB138_1 ; LA32-NEXT: # %bb.2: ; LA32-NEXT: move $a0, $a2 ; LA32-NEXT: ret ; ; LA64-LABEL: atomicrmw_nand_i32_monotonic: ; LA64: # %bb.0: -; LA64-NEXT: .LBB42_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: .LBB138_1: # =>This Inner Loop Header: Depth=1 ; LA64-NEXT: ll.w $a2, $a0, 0 ; LA64-NEXT: and $a3, $a2, $a1 ; LA64-NEXT: nor $a3, $a3, $zero ; LA64-NEXT: sc.w $a3, $a0, 0 -; LA64-NEXT: beqz $a3, .LBB42_1 +; LA64-NEXT: beqz $a3, .LBB138_1 ; LA64-NEXT: # %bb.2: ; LA64-NEXT: move $a0, $a2 ; LA64-NEXT: ret @@ -1493,12 +4684,12 @@ define i64 @atomicrmw_nand_i64_monotonic(ptr %a, i64 %b) nounwind { ; ; LA64-LABEL: atomicrmw_nand_i64_monotonic: ; LA64: # %bb.0: -; LA64-NEXT: .LBB43_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: .LBB139_1: # =>This Inner Loop Header: Depth=1 ; LA64-NEXT: ll.d $a2, $a0, 0 ; LA64-NEXT: and $a3, $a2, $a1 ; LA64-NEXT: nor $a3, $a3, $zero ; LA64-NEXT: sc.d $a3, $a0, 0 -; LA64-NEXT: beqz $a3, .LBB43_1 +; LA64-NEXT: beqz $a3, .LBB139_1 ; LA64-NEXT: # %bb.2: ; LA64-NEXT: move $a0, $a2 ; LA64-NEXT: ret @@ -1517,11 +4708,11 @@ define i8 @atomicrmw_and_i8_monotonic(ptr %a, i8 %b) nounwind { ; LA32-NEXT: orn $a1, $a1, $a3 ; LA32-NEXT: addi.w $a3, $zero, -4 ; LA32-NEXT: and $a0, $a0, $a3 -; LA32-NEXT: .LBB44_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: .LBB140_1: # =>This Inner Loop Header: Depth=1 ; LA32-NEXT: ll.w $a3, $a0, 0 ; LA32-NEXT: and $a4, $a3, $a1 ; LA32-NEXT: sc.w $a4, $a0, 0 -; LA32-NEXT: beqz $a4, .LBB44_1 +; LA32-NEXT: beqz $a4, .LBB140_1 ; LA32-NEXT: # %bb.2: ; LA32-NEXT: srl.w $a0, $a3, $a2 ; LA32-NEXT: ret @@ -1555,11 +4746,11 @@ define i16 @atomicrmw_and_i16_monotonic(ptr %a, i16 %b) nounwind { ; LA32-NEXT: orn $a1, $a1, $a2 ; LA32-NEXT: addi.w $a2, $zero, -4 ; LA32-NEXT: and $a0, $a0, $a2 -; LA32-NEXT: .LBB45_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: .LBB141_1: # =>This Inner Loop Header: Depth=1 ; LA32-NEXT: ll.w $a2, $a0, 0 ; LA32-NEXT: and $a4, $a2, $a1 ; LA32-NEXT: sc.w $a4, $a0, 0 -; LA32-NEXT: beqz $a4, .LBB45_1 +; LA32-NEXT: beqz $a4, .LBB141_1 ; LA32-NEXT: # %bb.2: ; LA32-NEXT: srl.w $a0, $a2, $a3 ; LA32-NEXT: ret @@ -1585,11 +4776,11 @@ define i16 @atomicrmw_and_i16_monotonic(ptr %a, i16 %b) nounwind { define i32 @atomicrmw_and_i32_monotonic(ptr %a, i32 %b) nounwind { ; LA32-LABEL: atomicrmw_and_i32_monotonic: ; LA32: # %bb.0: -; LA32-NEXT: .LBB46_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: .LBB142_1: # =>This Inner Loop Header: Depth=1 ; LA32-NEXT: ll.w $a2, $a0, 0 ; LA32-NEXT: and $a3, $a2, $a1 ; LA32-NEXT: sc.w $a3, $a0, 0 -; LA32-NEXT: beqz $a3, .LBB46_1 +; LA32-NEXT: beqz $a3, .LBB142_1 ; LA32-NEXT: # %bb.2: ; LA32-NEXT: move $a0, $a2 ; LA32-NEXT: ret @@ -1631,11 +4822,11 @@ define i8 @atomicrmw_or_i8_monotonic(ptr %a, i8 %b) nounwind { ; LA32-NEXT: slli.w $a0, $a0, 3 ; LA32-NEXT: andi $a1, $a1, 255 ; LA32-NEXT: sll.w $a1, $a1, $a0 -; LA32-NEXT: .LBB48_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: .LBB144_1: # =>This Inner Loop Header: Depth=1 ; LA32-NEXT: ll.w $a3, $a2, 0 ; LA32-NEXT: or $a4, $a3, $a1 ; LA32-NEXT: sc.w $a4, $a2, 0 -; LA32-NEXT: beqz $a4, .LBB48_1 +; LA32-NEXT: beqz $a4, .LBB144_1 ; LA32-NEXT: # %bb.2: ; LA32-NEXT: srl.w $a0, $a3, $a0 ; LA32-NEXT: ret @@ -1662,11 +4853,11 @@ define i16 @atomicrmw_or_i16_monotonic(ptr %a, i16 %b) nounwind { ; LA32-NEXT: slli.w $a0, $a0, 3 ; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 ; LA32-NEXT: sll.w $a1, $a1, $a0 -; LA32-NEXT: .LBB49_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: .LBB145_1: # =>This Inner Loop Header: Depth=1 ; LA32-NEXT: ll.w $a3, $a2, 0 ; LA32-NEXT: or $a4, $a3, $a1 ; LA32-NEXT: sc.w $a4, $a2, 0 -; LA32-NEXT: beqz $a4, .LBB49_1 +; LA32-NEXT: beqz $a4, .LBB145_1 ; LA32-NEXT: # %bb.2: ; LA32-NEXT: srl.w $a0, $a3, $a0 ; LA32-NEXT: ret @@ -1688,11 +4879,11 @@ define i16 @atomicrmw_or_i16_monotonic(ptr %a, i16 %b) nounwind { define i32 @atomicrmw_or_i32_monotonic(ptr %a, i32 %b) nounwind { ; LA32-LABEL: atomicrmw_or_i32_monotonic: ; LA32: # %bb.0: -; LA32-NEXT: .LBB50_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: .LBB146_1: # =>This Inner Loop Header: Depth=1 ; LA32-NEXT: ll.w $a2, $a0, 0 ; LA32-NEXT: or $a3, $a2, $a1 ; LA32-NEXT: sc.w $a3, $a0, 0 -; LA32-NEXT: beqz $a3, .LBB50_1 +; LA32-NEXT: beqz $a3, .LBB146_1 ; LA32-NEXT: # %bb.2: ; LA32-NEXT: move $a0, $a2 ; LA32-NEXT: ret @@ -1734,11 +4925,11 @@ define i8 @atomicrmw_xor_i8_monotonic(ptr %a, i8 %b) nounwind { ; LA32-NEXT: slli.w $a0, $a0, 3 ; LA32-NEXT: andi $a1, $a1, 255 ; LA32-NEXT: sll.w $a1, $a1, $a0 -; LA32-NEXT: .LBB52_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: .LBB148_1: # =>This Inner Loop Header: Depth=1 ; LA32-NEXT: ll.w $a3, $a2, 0 ; LA32-NEXT: xor $a4, $a3, $a1 ; LA32-NEXT: sc.w $a4, $a2, 0 -; LA32-NEXT: beqz $a4, .LBB52_1 +; LA32-NEXT: beqz $a4, .LBB148_1 ; LA32-NEXT: # %bb.2: ; LA32-NEXT: srl.w $a0, $a3, $a0 ; LA32-NEXT: ret @@ -1765,11 +4956,11 @@ define i16 @atomicrmw_xor_i16_monotonic(ptr %a, i16 %b) nounwind { ; LA32-NEXT: slli.w $a0, $a0, 3 ; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 ; LA32-NEXT: sll.w $a1, $a1, $a0 -; LA32-NEXT: .LBB53_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: .LBB149_1: # =>This Inner Loop Header: Depth=1 ; LA32-NEXT: ll.w $a3, $a2, 0 ; LA32-NEXT: xor $a4, $a3, $a1 ; LA32-NEXT: sc.w $a4, $a2, 0 -; LA32-NEXT: beqz $a4, .LBB53_1 +; LA32-NEXT: beqz $a4, .LBB149_1 ; LA32-NEXT: # %bb.2: ; LA32-NEXT: srl.w $a0, $a3, $a0 ; LA32-NEXT: ret @@ -1791,11 +4982,11 @@ define i16 @atomicrmw_xor_i16_monotonic(ptr %a, i16 %b) nounwind { define i32 @atomicrmw_xor_i32_monotonic(ptr %a, i32 %b) nounwind { ; LA32-LABEL: atomicrmw_xor_i32_monotonic: ; LA32: # %bb.0: -; LA32-NEXT: .LBB54_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: .LBB150_1: # =>This Inner Loop Header: Depth=1 ; LA32-NEXT: ll.w $a2, $a0, 0 ; LA32-NEXT: xor $a3, $a2, $a1 ; LA32-NEXT: sc.w $a3, $a0, 0 -; LA32-NEXT: beqz $a3, .LBB54_1 +; LA32-NEXT: beqz $a3, .LBB150_1 ; LA32-NEXT: # %bb.2: ; LA32-NEXT: move $a0, $a2 ; LA32-NEXT: ret diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/fence-singlethread.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/fence-singlethread.ll new file mode 100644 index 0000000000000000000000000000000000000000..a8b164a4cd3cf14a3777317fd311eaf14622c7bc --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/fence-singlethread.ll @@ -0,0 +1,17 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefix=LA32 +; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64 + +define void @fence_singlethread() { +; LA32-LABEL: fence_singlethread: +; LA32: # %bb.0: +; LA32-NEXT: #MEMBARRIER +; LA32-NEXT: ret +; +; LA64-LABEL: fence_singlethread: +; LA64: # %bb.0: +; LA64-NEXT: #MEMBARRIER +; LA64-NEXT: ret + fence syncscope("singlethread") seq_cst + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/fence.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/fence.ll index 724639f3c6fb9f02a7cf5ffeb0c057ee2ad94519..c5b2232f9b80302e7549d868bd1840f8cd8b19ed 100644 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/fence.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/fence.ll @@ -5,12 +5,12 @@ define void @fence_acquire() nounwind { ; LA32-LABEL: fence_acquire: ; LA32: # %bb.0: -; LA32-NEXT: dbar 0 +; LA32-NEXT: dbar 20 ; LA32-NEXT: ret ; ; LA64-LABEL: fence_acquire: ; LA64: # %bb.0: -; LA64-NEXT: dbar 0 +; LA64-NEXT: dbar 20 ; LA64-NEXT: ret fence acquire ret void @@ -19,12 +19,12 @@ define void @fence_acquire() nounwind { define void @fence_release() nounwind { ; LA32-LABEL: fence_release: ; LA32: # %bb.0: -; LA32-NEXT: dbar 0 +; LA32-NEXT: dbar 18 ; LA32-NEXT: ret ; ; LA64-LABEL: fence_release: ; LA64: # %bb.0: -; LA64-NEXT: dbar 0 +; LA64-NEXT: dbar 18 ; LA64-NEXT: ret fence release ret void @@ -33,12 +33,12 @@ define void @fence_release() nounwind { define void @fence_acq_rel() nounwind { ; LA32-LABEL: fence_acq_rel: ; LA32: # %bb.0: -; LA32-NEXT: dbar 0 +; LA32-NEXT: dbar 16 ; LA32-NEXT: ret ; ; LA64-LABEL: fence_acq_rel: ; LA64: # %bb.0: -; LA64-NEXT: dbar 0 +; LA64-NEXT: dbar 16 ; LA64-NEXT: ret fence acq_rel ret void @@ -47,12 +47,12 @@ define void @fence_acq_rel() nounwind { define void @fence_seq_cst() nounwind { ; LA32-LABEL: fence_seq_cst: ; LA32: # %bb.0: -; LA32-NEXT: dbar 0 +; LA32-NEXT: dbar 16 ; LA32-NEXT: ret ; ; LA64-LABEL: fence_seq_cst: ; LA64: # %bb.0: -; LA64-NEXT: dbar 0 +; LA64-NEXT: dbar 16 ; LA64-NEXT: ret fence seq_cst ret void diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll index e91d0c145eab6e176081cd536cb65bb01720ba60..8b170c479eed696b6e78e021a173d45f8b922237 100644 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll @@ -6,13 +6,13 @@ define i8 @load_acquire_i8(ptr %ptr) { ; LA32-LABEL: load_acquire_i8: ; LA32: # %bb.0: ; LA32-NEXT: ld.b $a0, $a0, 0 -; LA32-NEXT: dbar 0 +; LA32-NEXT: dbar 20 ; LA32-NEXT: ret ; ; LA64-LABEL: load_acquire_i8: ; LA64: # %bb.0: ; LA64-NEXT: ld.b $a0, $a0, 0 -; LA64-NEXT: dbar 0 +; LA64-NEXT: dbar 20 ; LA64-NEXT: ret %val = load atomic i8, ptr %ptr acquire, align 1 ret i8 %val @@ -22,13 +22,13 @@ define i16 @load_acquire_i16(ptr %ptr) { ; LA32-LABEL: load_acquire_i16: ; LA32: # %bb.0: ; LA32-NEXT: ld.h $a0, $a0, 0 -; LA32-NEXT: dbar 0 +; LA32-NEXT: dbar 20 ; LA32-NEXT: ret ; ; LA64-LABEL: load_acquire_i16: ; LA64: # %bb.0: ; LA64-NEXT: ld.h $a0, $a0, 0 -; LA64-NEXT: dbar 0 +; LA64-NEXT: dbar 20 ; LA64-NEXT: ret %val = load atomic i16, ptr %ptr acquire, align 2 ret i16 %val @@ -38,13 +38,13 @@ define i32 @load_acquire_i32(ptr %ptr) { ; LA32-LABEL: load_acquire_i32: ; LA32: # %bb.0: ; LA32-NEXT: ld.w $a0, $a0, 0 -; LA32-NEXT: dbar 0 +; LA32-NEXT: dbar 20 ; LA32-NEXT: ret ; ; LA64-LABEL: load_acquire_i32: ; LA64: # %bb.0: ; LA64-NEXT: ld.w $a0, $a0, 0 -; LA64-NEXT: dbar 0 +; LA64-NEXT: dbar 20 ; LA64-NEXT: ret %val = load atomic i32, ptr %ptr acquire, align 4 ret i32 %val @@ -66,22 +66,218 @@ define i64 @load_acquire_i64(ptr %ptr) { ; LA64-LABEL: load_acquire_i64: ; LA64: # %bb.0: ; LA64-NEXT: ld.d $a0, $a0, 0 -; LA64-NEXT: dbar 0 +; LA64-NEXT: dbar 20 ; LA64-NEXT: ret %val = load atomic i64, ptr %ptr acquire, align 8 ret i64 %val } +define i8 @load_unordered_i8(ptr %ptr) { +; LA32-LABEL: load_unordered_i8: +; LA32: # %bb.0: +; LA32-NEXT: ld.b $a0, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: load_unordered_i8: +; LA64: # %bb.0: +; LA64-NEXT: ld.b $a0, $a0, 0 +; LA64-NEXT: ret + %val = load atomic i8, ptr %ptr unordered, align 1 + ret i8 %val +} + +define i16 @load_unordered_i16(ptr %ptr) { +; LA32-LABEL: load_unordered_i16: +; LA32: # %bb.0: +; LA32-NEXT: ld.h $a0, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: load_unordered_i16: +; LA64: # %bb.0: +; LA64-NEXT: ld.h $a0, $a0, 0 +; LA64-NEXT: ret + %val = load atomic i16, ptr %ptr unordered, align 2 + ret i16 %val +} + +define i32 @load_unordered_i32(ptr %ptr) { +; LA32-LABEL: load_unordered_i32: +; LA32: # %bb.0: +; LA32-NEXT: ld.w $a0, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: load_unordered_i32: +; LA64: # %bb.0: +; LA64-NEXT: ld.w $a0, $a0, 0 +; LA64-NEXT: ret + %val = load atomic i32, ptr %ptr unordered, align 4 + ret i32 %val +} + +define i64 @load_unordered_i64(ptr %ptr) { +; LA32-LABEL: load_unordered_i64: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: .cfi_def_cfa_offset 16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: .cfi_offset 1, -4 +; LA32-NEXT: move $a1, $zero +; LA32-NEXT: bl %plt(__atomic_load_8) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: load_unordered_i64: +; LA64: # %bb.0: +; LA64-NEXT: ld.d $a0, $a0, 0 +; LA64-NEXT: ret + %val = load atomic i64, ptr %ptr unordered, align 8 + ret i64 %val +} + +define i8 @load_monotonic_i8(ptr %ptr) { +; LA32-LABEL: load_monotonic_i8: +; LA32: # %bb.0: +; LA32-NEXT: ld.b $a0, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: load_monotonic_i8: +; LA64: # %bb.0: +; LA64-NEXT: ld.b $a0, $a0, 0 +; LA64-NEXT: ret + %val = load atomic i8, ptr %ptr monotonic, align 1 + ret i8 %val +} + +define i16 @load_monotonic_i16(ptr %ptr) { +; LA32-LABEL: load_monotonic_i16: +; LA32: # %bb.0: +; LA32-NEXT: ld.h $a0, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: load_monotonic_i16: +; LA64: # %bb.0: +; LA64-NEXT: ld.h $a0, $a0, 0 +; LA64-NEXT: ret + %val = load atomic i16, ptr %ptr monotonic, align 2 + ret i16 %val +} + +define i32 @load_monotonic_i32(ptr %ptr) { +; LA32-LABEL: load_monotonic_i32: +; LA32: # %bb.0: +; LA32-NEXT: ld.w $a0, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: load_monotonic_i32: +; LA64: # %bb.0: +; LA64-NEXT: ld.w $a0, $a0, 0 +; LA64-NEXT: ret + %val = load atomic i32, ptr %ptr monotonic, align 4 + ret i32 %val +} + +define i64 @load_monotonic_i64(ptr %ptr) { +; LA32-LABEL: load_monotonic_i64: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: .cfi_def_cfa_offset 16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: .cfi_offset 1, -4 +; LA32-NEXT: move $a1, $zero +; LA32-NEXT: bl %plt(__atomic_load_8) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: load_monotonic_i64: +; LA64: # %bb.0: +; LA64-NEXT: ld.d $a0, $a0, 0 +; LA64-NEXT: ret + %val = load atomic i64, ptr %ptr monotonic, align 8 + ret i64 %val +} + +define i8 @load_seq_cst_i8(ptr %ptr) { +; LA32-LABEL: load_seq_cst_i8: +; LA32: # %bb.0: +; LA32-NEXT: ld.b $a0, $a0, 0 +; LA32-NEXT: dbar 16 +; LA32-NEXT: ret +; +; LA64-LABEL: load_seq_cst_i8: +; LA64: # %bb.0: +; LA64-NEXT: ld.b $a0, $a0, 0 +; LA64-NEXT: dbar 16 +; LA64-NEXT: ret + %val = load atomic i8, ptr %ptr seq_cst, align 1 + ret i8 %val +} + +define i16 @load_seq_cst_i16(ptr %ptr) { +; LA32-LABEL: load_seq_cst_i16: +; LA32: # %bb.0: +; LA32-NEXT: ld.h $a0, $a0, 0 +; LA32-NEXT: dbar 16 +; LA32-NEXT: ret +; +; LA64-LABEL: load_seq_cst_i16: +; LA64: # %bb.0: +; LA64-NEXT: ld.h $a0, $a0, 0 +; LA64-NEXT: dbar 16 +; LA64-NEXT: ret + %val = load atomic i16, ptr %ptr seq_cst, align 2 + ret i16 %val +} + +define i32 @load_seq_cst_i32(ptr %ptr) { +; LA32-LABEL: load_seq_cst_i32: +; LA32: # %bb.0: +; LA32-NEXT: ld.w $a0, $a0, 0 +; LA32-NEXT: dbar 16 +; LA32-NEXT: ret +; +; LA64-LABEL: load_seq_cst_i32: +; LA64: # %bb.0: +; LA64-NEXT: ld.w $a0, $a0, 0 +; LA64-NEXT: dbar 16 +; LA64-NEXT: ret + %val = load atomic i32, ptr %ptr seq_cst, align 4 + ret i32 %val +} + +define i64 @load_seq_cst_i64(ptr %ptr) { +; LA32-LABEL: load_seq_cst_i64: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: .cfi_def_cfa_offset 16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: .cfi_offset 1, -4 +; LA32-NEXT: ori $a1, $zero, 5 +; LA32-NEXT: bl %plt(__atomic_load_8) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: load_seq_cst_i64: +; LA64: # %bb.0: +; LA64-NEXT: ld.d $a0, $a0, 0 +; LA64-NEXT: dbar 16 +; LA64-NEXT: ret + %val = load atomic i64, ptr %ptr seq_cst, align 8 + ret i64 %val +} + define void @store_release_i8(ptr %ptr, i8 signext %v) { ; LA32-LABEL: store_release_i8: ; LA32: # %bb.0: -; LA32-NEXT: dbar 0 +; LA32-NEXT: dbar 18 ; LA32-NEXT: st.b $a1, $a0, 0 ; LA32-NEXT: ret ; ; LA64-LABEL: store_release_i8: ; LA64: # %bb.0: -; LA64-NEXT: dbar 0 +; LA64-NEXT: dbar 18 ; LA64-NEXT: st.b $a1, $a0, 0 ; LA64-NEXT: ret store atomic i8 %v, ptr %ptr release, align 1 @@ -91,13 +287,13 @@ define void @store_release_i8(ptr %ptr, i8 signext %v) { define void @store_release_i16(ptr %ptr, i16 signext %v) { ; LA32-LABEL: store_release_i16: ; LA32: # %bb.0: -; LA32-NEXT: dbar 0 +; LA32-NEXT: dbar 18 ; LA32-NEXT: st.h $a1, $a0, 0 ; LA32-NEXT: ret ; ; LA64-LABEL: store_release_i16: ; LA64: # %bb.0: -; LA64-NEXT: dbar 0 +; LA64-NEXT: dbar 18 ; LA64-NEXT: st.h $a1, $a0, 0 ; LA64-NEXT: ret store atomic i16 %v, ptr %ptr release, align 2 @@ -107,7 +303,7 @@ define void @store_release_i16(ptr %ptr, i16 signext %v) { define void @store_release_i32(ptr %ptr, i32 signext %v) { ; LA32-LABEL: store_release_i32: ; LA32: # %bb.0: -; LA32-NEXT: dbar 0 +; LA32-NEXT: dbar 18 ; LA32-NEXT: st.w $a1, $a0, 0 ; LA32-NEXT: ret ; @@ -269,16 +465,16 @@ define void @store_monotonic_i64(ptr %ptr, i64 %v) { define void @store_seq_cst_i8(ptr %ptr, i8 signext %v) { ; LA32-LABEL: store_seq_cst_i8: ; LA32: # %bb.0: -; LA32-NEXT: dbar 0 +; LA32-NEXT: dbar 16 ; LA32-NEXT: st.b $a1, $a0, 0 -; LA32-NEXT: dbar 0 +; LA32-NEXT: dbar 16 ; LA32-NEXT: ret ; ; LA64-LABEL: store_seq_cst_i8: ; LA64: # %bb.0: -; LA64-NEXT: dbar 0 +; LA64-NEXT: dbar 16 ; LA64-NEXT: st.b $a1, $a0, 0 -; LA64-NEXT: dbar 0 +; LA64-NEXT: dbar 16 ; LA64-NEXT: ret store atomic i8 %v, ptr %ptr seq_cst, align 1 ret void @@ -287,16 +483,16 @@ define void @store_seq_cst_i8(ptr %ptr, i8 signext %v) { define void @store_seq_cst_i16(ptr %ptr, i16 signext %v) { ; LA32-LABEL: store_seq_cst_i16: ; LA32: # %bb.0: -; LA32-NEXT: dbar 0 +; LA32-NEXT: dbar 16 ; LA32-NEXT: st.h $a1, $a0, 0 -; LA32-NEXT: dbar 0 +; LA32-NEXT: dbar 16 ; LA32-NEXT: ret ; ; LA64-LABEL: store_seq_cst_i16: ; LA64: # %bb.0: -; LA64-NEXT: dbar 0 +; LA64-NEXT: dbar 16 ; LA64-NEXT: st.h $a1, $a0, 0 -; LA64-NEXT: dbar 0 +; LA64-NEXT: dbar 16 ; LA64-NEXT: ret store atomic i16 %v, ptr %ptr seq_cst, align 2 ret void @@ -305,9 +501,9 @@ define void @store_seq_cst_i16(ptr %ptr, i16 signext %v) { define void @store_seq_cst_i32(ptr %ptr, i32 signext %v) { ; LA32-LABEL: store_seq_cst_i32: ; LA32: # %bb.0: -; LA32-NEXT: dbar 0 +; LA32-NEXT: dbar 16 ; LA32-NEXT: st.w $a1, $a0, 0 -; LA32-NEXT: dbar 0 +; LA32-NEXT: dbar 16 ; LA32-NEXT: ret ; ; LA64-LABEL: store_seq_cst_i32: diff --git a/llvm/unittests/Target/LoongArch/InstSizes.cpp b/llvm/unittests/Target/LoongArch/InstSizes.cpp index 1a5d4369c48be568cf9f39fbc2e7f3aa17a8b0b7..3180c7237a7909d1248f2f6c3dc559f00aa12bea 100644 --- a/llvm/unittests/Target/LoongArch/InstSizes.cpp +++ b/llvm/unittests/Target/LoongArch/InstSizes.cpp @@ -121,7 +121,7 @@ TEST(InstSizes, AtomicPseudo) { " dead early-clobber renamable $r10, dead early-clobber renamable $r11 = PseudoAtomicLoadAdd32 renamable $r7, renamable $r6, renamable $r8\n" " dead early-clobber renamable $r5, dead early-clobber renamable $r9, dead early-clobber renamable $r10 = PseudoMaskedAtomicLoadUMax32 renamable $r7, renamable $r6, renamable $r8, 4\n" " early-clobber renamable $r9, dead early-clobber renamable $r10, dead early-clobber renamable $r11 = PseudoMaskedAtomicLoadMax32 killed renamable $r6, killed renamable $r5, killed renamable $r7, killed renamable $r8, 4\n" - " dead early-clobber renamable $r5, dead early-clobber renamable $r9 = PseudoCmpXchg32 renamable $r7, renamable $r4, renamable $r6\n" + " dead early-clobber renamable $r5, dead early-clobber renamable $r9 = PseudoCmpXchg32 renamable $r7, renamable $r4, renamable $r6, 4\n" " dead early-clobber renamable $r5, dead early-clobber renamable $r9 = PseudoMaskedCmpXchg32 killed renamable $r7, killed renamable $r4, killed renamable $r6, killed renamable $r8, 4\n", // clang-format on [](LoongArchInstrInfo &II, MachineFunction &MF) {