diff --git a/docs/LangRef.rst b/docs/LangRef.rst index 044d21c991e..65c8c75a22d 100644 --- a/docs/LangRef.rst +++ b/docs/LangRef.rst @@ -8690,15 +8690,18 @@ operation. The operation must be one of the following keywords: - min - umax - umin +- fadd +- fsub For most of these operations, the type of '' must be an integer type whose bit width is a power of two greater than or equal to eight and less than or equal to a target-specific size limit. For xchg, this may also be a floating point type with the same size constraints as -integers. The type of the '````' operand must be a pointer to -that type. If the ``atomicrmw`` is marked as ``volatile``, then the -optimizer is not allowed to modify the number or order of execution of -this ``atomicrmw`` with other :ref:`volatile operations `. +integers. For fadd/fsub, this must be a floating point type. The +type of the '````' operand must be a pointer to that type. If +the ``atomicrmw`` is marked as ``volatile``, then the optimizer is not +allowed to modify the number or order of execution of this +``atomicrmw`` with other :ref:`volatile operations `. A ``atomicrmw`` instruction can also take an optional ":ref:`syncscope `" argument. @@ -8724,6 +8727,8 @@ operation argument: comparison) - umin: ``*ptr = *ptr < val ? *ptr : val`` (using an unsigned comparison) +- fadd: ``*ptr = *ptr + val`` (using floating point arithmetic) +- fsub: ``*ptr = *ptr - val`` (using floating point arithmetic) Example: """""""" diff --git a/include/llvm/Bitcode/LLVMBitCodes.h b/include/llvm/Bitcode/LLVMBitCodes.h index e694331776c..ce853cd3998 100644 --- a/include/llvm/Bitcode/LLVMBitCodes.h +++ b/include/llvm/Bitcode/LLVMBitCodes.h @@ -406,7 +406,9 @@ enum RMWOperations { RMW_MAX = 7, RMW_MIN = 8, RMW_UMAX = 9, - RMW_UMIN = 10 + RMW_UMIN = 10, + RMW_FADD = 11, + RMW_FSUB = 12 }; /// OverflowingBinaryOperatorOptionalFlags - Flags for serializing diff --git a/include/llvm/CodeGen/TargetLowering.h b/include/llvm/CodeGen/TargetLowering.h index 3886ac54ed3..850b9e2d555 100644 --- a/include/llvm/CodeGen/TargetLowering.h +++ b/include/llvm/CodeGen/TargetLowering.h @@ -1715,8 +1715,9 @@ public: /// Returns how the IR-level AtomicExpand pass should expand the given /// AtomicRMW, if at all. Default is to never expand. - virtual AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *) const { - return AtomicExpansionKind::None; + virtual AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const { + return RMW->isFloatingPointOperation() ? + AtomicExpansionKind::CmpXChg : AtomicExpansionKind::None; } /// On some platforms, an AtomicRMW that never actually modifies the value diff --git a/include/llvm/IR/Instructions.h b/include/llvm/IR/Instructions.h index c720dd8745e..beadf7313a2 100644 --- a/include/llvm/IR/Instructions.h +++ b/include/llvm/IR/Instructions.h @@ -724,8 +724,14 @@ public: /// *p = old getType()->getPointerAddressSpace(); } + bool isFloatingPointOperation() const { + return isFPOperation(getOperation()); + } + // Methods for support type inquiry through isa, cast, and dyn_cast: static bool classof(const Instruction *I) { return I->getOpcode() == Instruction::AtomicRMW; diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp index 20d96061102..855c5d26500 100644 --- a/lib/AsmParser/LLParser.cpp +++ b/lib/AsmParser/LLParser.cpp @@ -6815,6 +6815,7 @@ int LLParser::ParseAtomicRMW(Instruction *&Inst, PerFunctionState &PFS) { AtomicOrdering Ordering = AtomicOrdering::NotAtomic; SyncScope::ID SSID = SyncScope::System; bool isVolatile = false; + bool IsFP = false; AtomicRMWInst::BinOp Operation; if (EatIfPresent(lltok::kw_volatile)) @@ -6833,6 +6834,14 @@ int LLParser::ParseAtomicRMW(Instruction *&Inst, PerFunctionState &PFS) { case lltok::kw_min: Operation = AtomicRMWInst::Min; break; case lltok::kw_umax: Operation = AtomicRMWInst::UMax; break; case lltok::kw_umin: Operation = AtomicRMWInst::UMin; break; + case lltok::kw_fadd: + Operation = AtomicRMWInst::FAdd; + IsFP = true; + break; + case lltok::kw_fsub: + Operation = AtomicRMWInst::FSub; + IsFP = true; + break; } Lex.Lex(); // Eat the operation. @@ -6849,18 +6858,25 @@ int LLParser::ParseAtomicRMW(Instruction *&Inst, PerFunctionState &PFS) { if (cast(Ptr->getType())->getElementType() != Val->getType()) return Error(ValLoc, "atomicrmw value and pointer type do not match"); - if (Operation != AtomicRMWInst::Xchg && !Val->getType()->isIntegerTy()) { - return Error(ValLoc, "atomicrmw " + - AtomicRMWInst::getOperationName(Operation) + - " operand must be an integer"); - } - - if (Operation == AtomicRMWInst::Xchg && - !Val->getType()->isIntegerTy() && - !Val->getType()->isFloatingPointTy()) { - return Error(ValLoc, "atomicrmw " + - AtomicRMWInst::getOperationName(Operation) + - " operand must be an integer or floating point type"); + if (Operation == AtomicRMWInst::Xchg) { + if (!Val->getType()->isIntegerTy() && + !Val->getType()->isFloatingPointTy()) { + return Error(ValLoc, "atomicrmw " + + AtomicRMWInst::getOperationName(Operation) + + " operand must be an integer or floating point type"); + } + } else if (IsFP) { + if (!Val->getType()->isFloatingPointTy()) { + return Error(ValLoc, "atomicrmw " + + AtomicRMWInst::getOperationName(Operation) + + " operand must be a floating point type"); + } + } else { + if (!Val->getType()->isIntegerTy()) { + return Error(ValLoc, "atomicrmw " + + AtomicRMWInst::getOperationName(Operation) + + " operand must be an integer"); + } } unsigned Size = Val->getType()->getPrimitiveSizeInBits(); diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp index 87bd8f625fa..0b93a61dc40 100644 --- a/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/lib/Bitcode/Reader/BitcodeReader.cpp @@ -1034,6 +1034,8 @@ static AtomicRMWInst::BinOp getDecodedRMWOperation(unsigned Val) { case bitc::RMW_MIN: return AtomicRMWInst::Min; case bitc::RMW_UMAX: return AtomicRMWInst::UMax; case bitc::RMW_UMIN: return AtomicRMWInst::UMin; + case bitc::RMW_FADD: return AtomicRMWInst::FAdd; + case bitc::RMW_FSUB: return AtomicRMWInst::FSub; } } diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp index d345ae0c416..f4a539e51f7 100644 --- a/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -559,6 +559,8 @@ static unsigned getEncodedRMWOperation(AtomicRMWInst::BinOp Op) { case AtomicRMWInst::Min: return bitc::RMW_MIN; case AtomicRMWInst::UMax: return bitc::RMW_UMAX; case AtomicRMWInst::UMin: return bitc::RMW_UMIN; + case AtomicRMWInst::FAdd: return bitc::RMW_FADD; + case AtomicRMWInst::FSub: return bitc::RMW_FSUB; } } diff --git a/lib/CodeGen/AtomicExpandPass.cpp b/lib/CodeGen/AtomicExpandPass.cpp index bef9682fbd5..2d915945392 100644 --- a/lib/CodeGen/AtomicExpandPass.cpp +++ b/lib/CodeGen/AtomicExpandPass.cpp @@ -549,6 +549,10 @@ static Value *performAtomicOp(AtomicRMWInst::BinOp Op, IRBuilder<> &Builder, case AtomicRMWInst::UMin: NewVal = Builder.CreateICmpULE(Loaded, Inc); return Builder.CreateSelect(NewVal, Loaded, Inc, "new"); + case AtomicRMWInst::FAdd: + return Builder.CreateFAdd(Loaded, Inc, "new"); + case AtomicRMWInst::FSub: + return Builder.CreateFSub(Loaded, Inc, "new"); default: llvm_unreachable("Unknown atomic op"); } @@ -1547,6 +1551,8 @@ static ArrayRef GetRMWLibcall(AtomicRMWInst::BinOp Op) { case AtomicRMWInst::Min: case AtomicRMWInst::UMax: case AtomicRMWInst::UMin: + case AtomicRMWInst::FAdd: + case AtomicRMWInst::FSub: // No atomic libcalls are available for max/min/umax/umin. return {}; } diff --git a/lib/IR/Instructions.cpp b/lib/IR/Instructions.cpp index 8b674465792..f48a970fd40 100644 --- a/lib/IR/Instructions.cpp +++ b/lib/IR/Instructions.cpp @@ -1407,6 +1407,10 @@ StringRef AtomicRMWInst::getOperationName(BinOp Op) { return "umax"; case AtomicRMWInst::UMin: return "umin"; + case AtomicRMWInst::FAdd: + return "fadd"; + case AtomicRMWInst::FSub: + return "fsub"; case AtomicRMWInst::BAD_BINOP: return ""; } diff --git a/lib/IR/Verifier.cpp b/lib/IR/Verifier.cpp index fb2388c232b..1000e210533 100644 --- a/lib/IR/Verifier.cpp +++ b/lib/IR/Verifier.cpp @@ -3435,6 +3435,11 @@ void Verifier::visitAtomicRMWInst(AtomicRMWInst &RMWI) { AtomicRMWInst::getOperationName(Op) + " operand must have integer or floating point type!", &RMWI, ElTy); + } else if (AtomicRMWInst::isFPOperation(Op)) { + Assert(ElTy->isFloatingPointTy(), "atomicrmw " + + AtomicRMWInst::getOperationName(Op) + + " operand must have floating point type!", + &RMWI, ElTy); } else { Assert(ElTy->isIntegerTy(), "atomicrmw " + AtomicRMWInst::getOperationName(Op) + diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 2530b0af87d..df77e8e7d5b 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -11600,6 +11600,9 @@ AArch64TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const { // For the real atomic operations, we have ldxr/stxr up to 128 bits, TargetLowering::AtomicExpansionKind AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { + if (AI->isFloatingPointOperation()) + return AtomicExpansionKind::CmpXChg; + unsigned Size = AI->getType()->getPrimitiveSizeInBits(); if (Size > 128) return AtomicExpansionKind::None; // Nand not supported in LSE. diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 5cf9a2fc2bf..469ceb9c213 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -14645,6 +14645,9 @@ ARMTargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const { // and up to 64 bits on the non-M profiles TargetLowering::AtomicExpansionKind ARMTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { + if (AI->isFloatingPointOperation()) + return AtomicExpansionKind::CmpXChg; + unsigned Size = AI->getType()->getPrimitiveSizeInBits(); bool hasAtomicRMW = !Subtarget->isThumb() || Subtarget->hasV8MBaselineOps(); return (Size <= (Subtarget->isMClass() ? 32U : 64U) && hasAtomicRMW) diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp index 329d7aeb693..b3781fe9e71 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -3110,13 +3110,21 @@ Value *HexagonTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr, AtomicOrdering Ord) const { BasicBlock *BB = Builder.GetInsertBlock(); Module *M = BB->getParent()->getParent(); - Type *Ty = cast(Addr->getType())->getElementType(); + auto PT = cast(Addr->getType()); + Type *Ty = PT->getElementType(); unsigned SZ = Ty->getPrimitiveSizeInBits(); assert((SZ == 32 || SZ == 64) && "Only 32/64-bit atomic loads supported"); Intrinsic::ID IntID = (SZ == 32) ? Intrinsic::hexagon_L2_loadw_locked : Intrinsic::hexagon_L4_loadd_locked; + + PointerType *NewPtrTy + = Builder.getIntNTy(SZ)->getPointerTo(PT->getAddressSpace()); + Addr = Builder.CreateBitCast(Addr, NewPtrTy); + Value *Fn = Intrinsic::getDeclaration(M, IntID); - return Builder.CreateCall(Fn, Addr, "larx"); + Value *Call = Builder.CreateCall(Fn, Addr, "larx"); + + return Builder.CreateBitCast(Call, Ty); } /// Perform a store-conditional operation to Addr. Return the status of the @@ -3127,10 +3135,17 @@ Value *HexagonTargetLowering::emitStoreConditional(IRBuilder<> &Builder, Module *M = BB->getParent()->getParent(); Type *Ty = Val->getType(); unsigned SZ = Ty->getPrimitiveSizeInBits(); + + Type *CastTy = Builder.getIntNTy(SZ); assert((SZ == 32 || SZ == 64) && "Only 32/64-bit atomic stores supported"); Intrinsic::ID IntID = (SZ == 32) ? Intrinsic::hexagon_S2_storew_locked : Intrinsic::hexagon_S4_stored_locked; Value *Fn = Intrinsic::getDeclaration(M, IntID); + + unsigned AS = Addr->getType()->getPointerAddressSpace(); + Addr = Builder.CreateBitCast(Addr, CastTy->getPointerTo(AS)); + Val = Builder.CreateBitCast(Val, CastTy); + Value *Call = Builder.CreateCall(Fn, {Addr, Val}, "stcx"); Value *Cmp = Builder.CreateICmpEQ(Call, Builder.getInt32(0), ""); Value *Ext = Builder.CreateZExt(Cmp, Type::getInt32Ty(M->getContext())); diff --git a/lib/Target/RISCV/RISCVISelLowering.cpp b/lib/Target/RISCV/RISCVISelLowering.cpp index 95943f3dbaf..16591c4208c 100644 --- a/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1724,6 +1724,12 @@ Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilder<> &Builder, TargetLowering::AtomicExpansionKind RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { + // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating + // point operations can't be used in an lr/sc sequence without breaking the + // forward-progress guarantee. + if (AI->isFloatingPointOperation()) + return AtomicExpansionKind::CmpXChg; + unsigned Size = AI->getType()->getPrimitiveSizeInBits(); if (Size == 8 || Size == 16) return AtomicExpansionKind::MaskedIntrinsic; diff --git a/test/Assembler/atomic.ll b/test/Assembler/atomic.ll index a8b527f2f86..04ff262074f 100644 --- a/test/Assembler/atomic.ll +++ b/test/Assembler/atomic.ll @@ -39,3 +39,13 @@ define void @f(i32* %x) { fence syncscope("device") seq_cst ret void } + +define void @fp_atomics(float* %x) { + ; CHECK: atomicrmw fadd float* %x, float 1.000000e+00 seq_cst + atomicrmw fadd float* %x, float 1.0 seq_cst + + ; CHECK: atomicrmw volatile fadd float* %x, float 1.000000e+00 seq_cst + atomicrmw volatile fadd float* %x, float 1.0 seq_cst + + ret void +} diff --git a/test/Assembler/invalid-atomicrmw-fadd-must-be-fp-type.ll b/test/Assembler/invalid-atomicrmw-fadd-must-be-fp-type.ll new file mode 100644 index 00000000000..3185d9505db --- /dev/null +++ b/test/Assembler/invalid-atomicrmw-fadd-must-be-fp-type.ll @@ -0,0 +1,7 @@ +; RUN: not llvm-as -disable-output %s 2>&1 | FileCheck %s + +; CHECK: error: atomicrmw fadd operand must be a floating point type +define void @f(i32* %ptr) { + atomicrmw fadd i32* %ptr, i32 2 seq_cst + ret void +} diff --git a/test/Assembler/invalid-atomicrmw-fsub-must-be-fp-type.ll b/test/Assembler/invalid-atomicrmw-fsub-must-be-fp-type.ll new file mode 100644 index 00000000000..cd5bd4beff5 --- /dev/null +++ b/test/Assembler/invalid-atomicrmw-fsub-must-be-fp-type.ll @@ -0,0 +1,7 @@ +; RUN: not llvm-as -disable-output %s 2>&1 | FileCheck %s + +; CHECK: error: atomicrmw fsub operand must be a floating point type +define void @f(i32* %ptr) { + atomicrmw fsub i32* %ptr, i32 2 seq_cst + ret void +} diff --git a/test/Bitcode/compatibility.ll b/test/Bitcode/compatibility.ll index 320ed831db3..3c5e86eee01 100644 --- a/test/Bitcode/compatibility.ll +++ b/test/Bitcode/compatibility.ll @@ -764,6 +764,13 @@ define void @atomics(i32* %word) { define void @fp_atomics(float* %word) { ; CHECK: %atomicrmw.xchg = atomicrmw xchg float* %word, float 1.000000e+00 monotonic %atomicrmw.xchg = atomicrmw xchg float* %word, float 1.0 monotonic + +; CHECK: %atomicrmw.fadd = atomicrmw fadd float* %word, float 1.000000e+00 monotonic + %atomicrmw.fadd = atomicrmw fadd float* %word, float 1.0 monotonic + +; CHECK: %atomicrmw.fsub = atomicrmw fsub float* %word, float 1.000000e+00 monotonic + %atomicrmw.fsub = atomicrmw fsub float* %word, float 1.0 monotonic + ret void } diff --git a/test/Transforms/AtomicExpand/AArch64/atomicrmw-fp.ll b/test/Transforms/AtomicExpand/AArch64/atomicrmw-fp.ll new file mode 100644 index 00000000000..d63f911a33f --- /dev/null +++ b/test/Transforms/AtomicExpand/AArch64/atomicrmw-fp.ll @@ -0,0 +1,47 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -mtriple=aarch64-linux-gnu -atomic-expand %s | FileCheck %s + +define float @test_atomicrmw_fadd_f32(float* %ptr, float %value) { +; CHECK-LABEL: @test_atomicrmw_fadd_f32( +; CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[PTR:%.*]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = bitcast float* [[PTR]] to i32* +; CHECK-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 +; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg i32* [[TMP2]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 +; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 +; CHECK-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: ret float [[TMP6]] +; + %res = atomicrmw fadd float* %ptr, float %value seq_cst + ret float %res +} + +define float @test_atomicrmw_fsub_f32(float* %ptr, float %value) { +; CHECK-LABEL: @test_atomicrmw_fsub_f32( +; CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[PTR:%.*]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[NEW:%.*]] = fsub float [[LOADED]], [[VALUE:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = bitcast float* [[PTR]] to i32* +; CHECK-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 +; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg i32* [[TMP2]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 +; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 +; CHECK-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: ret float [[TMP6]] +; + %res = atomicrmw fsub float* %ptr, float %value seq_cst + ret float %res +} + diff --git a/test/Transforms/AtomicExpand/ARM/atomicrmw-fp.ll b/test/Transforms/AtomicExpand/ARM/atomicrmw-fp.ll new file mode 100644 index 00000000000..6f8ffc1cba2 --- /dev/null +++ b/test/Transforms/AtomicExpand/ARM/atomicrmw-fp.ll @@ -0,0 +1,51 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -mtriple=armv7-apple-ios7.0 -atomic-expand %s | FileCheck %s + +define float @test_atomicrmw_fadd_f32(float* %ptr, float %value) { +; CHECK-LABEL: @test_atomicrmw_fadd_f32( +; CHECK-NEXT: call void @llvm.arm.dmb(i32 11) +; CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[PTR:%.*]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = bitcast float* [[PTR]] to i32* +; CHECK-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 +; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg i32* [[TMP2]], i32 [[TMP4]], i32 [[TMP3]] monotonic monotonic +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 +; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 +; CHECK-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: call void @llvm.arm.dmb(i32 11) +; CHECK-NEXT: ret float [[TMP6]] +; + %res = atomicrmw fadd float* %ptr, float %value seq_cst + ret float %res +} + +define float @test_atomicrmw_fsub_f32(float* %ptr, float %value) { +; CHECK-LABEL: @test_atomicrmw_fsub_f32( +; CHECK-NEXT: call void @llvm.arm.dmb(i32 11) +; CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[PTR:%.*]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[NEW:%.*]] = fsub float [[LOADED]], [[VALUE:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = bitcast float* [[PTR]] to i32* +; CHECK-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 +; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg i32* [[TMP2]], i32 [[TMP4]], i32 [[TMP3]] monotonic monotonic +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 +; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 +; CHECK-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: call void @llvm.arm.dmb(i32 11) +; CHECK-NEXT: ret float [[TMP6]] +; + %res = atomicrmw fsub float* %ptr, float %value seq_cst + ret float %res +} + diff --git a/test/Transforms/AtomicExpand/Hexagon/atomicrmw-fp.ll b/test/Transforms/AtomicExpand/Hexagon/atomicrmw-fp.ll new file mode 100644 index 00000000000..34026909d76 --- /dev/null +++ b/test/Transforms/AtomicExpand/Hexagon/atomicrmw-fp.ll @@ -0,0 +1,47 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -mtriple=hexagon-- -atomic-expand %s | FileCheck %s + +define float @test_atomicrmw_fadd_f32(float* %ptr, float %value) { +; CHECK-LABEL: @test_atomicrmw_fadd_f32( +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[PTR:%.*]] to i32* +; CHECK-NEXT: [[LARX:%.*]] = call i32 @llvm.hexagon.L2.loadw.locked(i32* [[TMP1]]) +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 [[LARX]] to float +; CHECK-NEXT: [[NEW:%.*]] = fadd float [[TMP2]], [[VALUE:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[PTR]] to i32* +; CHECK-NEXT: [[TMP4:%.*]] = bitcast float [[NEW]] to i32 +; CHECK-NEXT: [[STCX:%.*]] = call i32 @llvm.hexagon.S2.storew.locked(i32* [[TMP3]], i32 [[TMP4]]) +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[STCX]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = zext i1 [[TMP5]] to i32 +; CHECK-NEXT: [[TRYAGAIN:%.*]] = icmp ne i32 [[TMP6]], 0 +; CHECK-NEXT: br i1 [[TRYAGAIN]], label [[ATOMICRMW_START]], label [[ATOMICRMW_END:%.*]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: ret float [[TMP2]] +; + %res = atomicrmw fadd float* %ptr, float %value seq_cst + ret float %res +} + +define float @test_atomicrmw_fsub_f32(float* %ptr, float %value) { +; CHECK-LABEL: @test_atomicrmw_fsub_f32( +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[PTR:%.*]] to i32* +; CHECK-NEXT: [[LARX:%.*]] = call i32 @llvm.hexagon.L2.loadw.locked(i32* [[TMP1]]) +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 [[LARX]] to float +; CHECK-NEXT: [[NEW:%.*]] = fsub float [[TMP2]], [[VALUE:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[PTR]] to i32* +; CHECK-NEXT: [[TMP4:%.*]] = bitcast float [[NEW]] to i32 +; CHECK-NEXT: [[STCX:%.*]] = call i32 @llvm.hexagon.S2.storew.locked(i32* [[TMP3]], i32 [[TMP4]]) +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[STCX]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = zext i1 [[TMP5]] to i32 +; CHECK-NEXT: [[TRYAGAIN:%.*]] = icmp ne i32 [[TMP6]], 0 +; CHECK-NEXT: br i1 [[TRYAGAIN]], label [[ATOMICRMW_START]], label [[ATOMICRMW_END:%.*]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: ret float [[TMP2]] +; + %res = atomicrmw fsub float* %ptr, float %value seq_cst + ret float %res +} + diff --git a/test/Transforms/AtomicExpand/Hexagon/lit.local.cfg b/test/Transforms/AtomicExpand/Hexagon/lit.local.cfg new file mode 100644 index 00000000000..cc6a7edf05f --- /dev/null +++ b/test/Transforms/AtomicExpand/Hexagon/lit.local.cfg @@ -0,0 +1,2 @@ +if not 'Hexagon' in config.root.targets: + config.unsupported = True diff --git a/test/Transforms/AtomicExpand/Mips/atomicrmw-fp.ll b/test/Transforms/AtomicExpand/Mips/atomicrmw-fp.ll new file mode 100644 index 00000000000..7931b2bb7f3 --- /dev/null +++ b/test/Transforms/AtomicExpand/Mips/atomicrmw-fp.ll @@ -0,0 +1,51 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -mtriple=mips64-mti-linux-gnu -atomic-expand %s | FileCheck %s + +define float @test_atomicrmw_fadd_f32(float* %ptr, float %value) { +; CHECK-LABEL: @test_atomicrmw_fadd_f32( +; CHECK-NEXT: fence seq_cst +; CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[PTR:%.*]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = bitcast float* [[PTR]] to i32* +; CHECK-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 +; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg i32* [[TMP2]], i32 [[TMP4]], i32 [[TMP3]] monotonic monotonic +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 +; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 +; CHECK-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: fence seq_cst +; CHECK-NEXT: ret float [[TMP6]] +; + %res = atomicrmw fadd float* %ptr, float %value seq_cst + ret float %res +} + +define float @test_atomicrmw_fsub_f32(float* %ptr, float %value) { +; CHECK-LABEL: @test_atomicrmw_fsub_f32( +; CHECK-NEXT: fence seq_cst +; CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[PTR:%.*]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[NEW:%.*]] = fsub float [[LOADED]], [[VALUE:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = bitcast float* [[PTR]] to i32* +; CHECK-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 +; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg i32* [[TMP2]], i32 [[TMP4]], i32 [[TMP3]] monotonic monotonic +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 +; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 +; CHECK-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: fence seq_cst +; CHECK-NEXT: ret float [[TMP6]] +; + %res = atomicrmw fsub float* %ptr, float %value seq_cst + ret float %res +} + diff --git a/test/Transforms/AtomicExpand/Mips/lit.local.cfg b/test/Transforms/AtomicExpand/Mips/lit.local.cfg new file mode 100644 index 00000000000..7d12f7a9c56 --- /dev/null +++ b/test/Transforms/AtomicExpand/Mips/lit.local.cfg @@ -0,0 +1,2 @@ +if not 'Mips' in config.root.targets: + config.unsupported = True diff --git a/test/Transforms/AtomicExpand/RISCV/atomicrmw-fp.ll b/test/Transforms/AtomicExpand/RISCV/atomicrmw-fp.ll new file mode 100644 index 00000000000..7ffe404c843 --- /dev/null +++ b/test/Transforms/AtomicExpand/RISCV/atomicrmw-fp.ll @@ -0,0 +1,59 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -mtriple=riscv32-- -atomic-expand %s | FileCheck %s + +define float @test_atomicrmw_fadd_f32(float* %ptr, float %value) { +; CHECK-LABEL: @test_atomicrmw_fadd_f32( +; CHECK-NEXT: [[TMP1:%.*]] = alloca float, align 4 +; CHECK-NEXT: [[TMP2:%.*]] = load float, float* [[PTR:%.*]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi float [ [[TMP2]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[PTR]] to i8* +; CHECK-NEXT: [[TMP4:%.*]] = bitcast float* [[TMP1]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP4]]) +; CHECK-NEXT: store float [[LOADED]], float* [[TMP1]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast float [[NEW]] to i32 +; CHECK-NEXT: [[TMP6:%.*]] = call zeroext i1 @__atomic_compare_exchange_4(i8* [[TMP3]], i8* [[TMP4]], i32 [[TMP5]], i32 5, i32 5) +; CHECK-NEXT: [[TMP7:%.*]] = load float, float* [[TMP1]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP4]]) +; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { float, i1 } undef, float [[TMP7]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { float, i1 } [[TMP8]], i1 [[TMP6]], 1 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { float, i1 } [[TMP9]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { float, i1 } [[TMP9]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: ret float [[NEWLOADED]] +; + %res = atomicrmw fadd float* %ptr, float %value seq_cst + ret float %res +} + +define float @test_atomicrmw_fsub_f32(float* %ptr, float %value) { +; CHECK-LABEL: @test_atomicrmw_fsub_f32( +; CHECK-NEXT: [[TMP1:%.*]] = alloca float, align 4 +; CHECK-NEXT: [[TMP2:%.*]] = load float, float* [[PTR:%.*]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi float [ [[TMP2]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[NEW:%.*]] = fsub float [[LOADED]], [[VALUE:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[PTR]] to i8* +; CHECK-NEXT: [[TMP4:%.*]] = bitcast float* [[TMP1]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP4]]) +; CHECK-NEXT: store float [[LOADED]], float* [[TMP1]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast float [[NEW]] to i32 +; CHECK-NEXT: [[TMP6:%.*]] = call zeroext i1 @__atomic_compare_exchange_4(i8* [[TMP3]], i8* [[TMP4]], i32 [[TMP5]], i32 5, i32 5) +; CHECK-NEXT: [[TMP7:%.*]] = load float, float* [[TMP1]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP4]]) +; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { float, i1 } undef, float [[TMP7]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { float, i1 } [[TMP8]], i1 [[TMP6]], 1 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { float, i1 } [[TMP9]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { float, i1 } [[TMP9]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: ret float [[NEWLOADED]] +; + %res = atomicrmw fsub float* %ptr, float %value seq_cst + ret float %res +} + diff --git a/test/Transforms/AtomicExpand/RISCV/lit.local.cfg b/test/Transforms/AtomicExpand/RISCV/lit.local.cfg new file mode 100644 index 00000000000..7aaeda5a5b3 --- /dev/null +++ b/test/Transforms/AtomicExpand/RISCV/lit.local.cfg @@ -0,0 +1,5 @@ +config.suffixes = ['.ll'] + +targets = set(config.root.targets_to_build.split()) +if not 'RISCV' in targets: + config.unsupported = True