From 44582e29c8f7e774c11a623a09b97eb2eb879d90 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Tue, 22 Jan 2019 03:32:36 +0000
Subject: [PATCH] IR: Add fp operations to atomicrmw

Add just fadd/fsub for now.

llvm-svn: 351778
---
 docs/LangRef.rst                              | 13 +++--
 include/llvm/Bitcode/LLVMBitCodes.h           |  4 +-
 include/llvm/CodeGen/TargetLowering.h         |  5 +-
 include/llvm/IR/Instructions.h                | 22 +++++++-
 lib/AsmParser/LLParser.cpp                    | 40 ++++++++++-----
 lib/Bitcode/Reader/BitcodeReader.cpp          |  2 +
 lib/Bitcode/Writer/BitcodeWriter.cpp          |  2 +
 lib/CodeGen/AtomicExpandPass.cpp              |  6 +++
 lib/IR/Instructions.cpp                       |  4 ++
 lib/IR/Verifier.cpp                           |  5 ++
 lib/Target/AArch64/AArch64ISelLowering.cpp    |  3 ++
 lib/Target/ARM/ARMISelLowering.cpp            |  3 ++
 lib/Target/Hexagon/HexagonISelLowering.cpp    | 19 ++++++-
 lib/Target/RISCV/RISCVISelLowering.cpp        |  3 ++
 test/Assembler/atomic.ll                      | 10 ++++
 .../invalid-atomicrmw-fadd-must-be-fp-type.ll |  7 +++
 .../invalid-atomicrmw-fsub-must-be-fp-type.ll |  7 +++
 test/Bitcode/compatibility.ll                 |  7 +++
 .../AtomicExpand/AArch64/atomicrmw-fp.ll      | 47 +++++++++++++++++
 .../AtomicExpand/ARM/atomicrmw-fp.ll          | 51 +++++++++++++++++++
 .../AtomicExpand/Hexagon/atomicrmw-fp.ll      | 47 +++++++++++++++++
 .../AtomicExpand/Hexagon/lit.local.cfg        |  2 +
 .../AtomicExpand/Mips/atomicrmw-fp.ll         | 51 +++++++++++++++++++
 .../AtomicExpand/Mips/lit.local.cfg           |  2 +
 .../AtomicExpand/RISCV/atomicrmw-fp.ll        | 47 +++++++++++++++++
 .../AtomicExpand/RISCV/lit.local.cfg          |  5 ++
 26 files changed, 392 insertions(+), 22 deletions(-)
 create mode 100644 test/Assembler/invalid-atomicrmw-fadd-must-be-fp-type.ll
 create mode 100644 test/Assembler/invalid-atomicrmw-fsub-must-be-fp-type.ll
 create mode 100644 test/Transforms/AtomicExpand/AArch64/atomicrmw-fp.ll
 create mode 100644 test/Transforms/AtomicExpand/ARM/atomicrmw-fp.ll
 create mode 100644 test/Transforms/AtomicExpand/Hexagon/atomicrmw-fp.ll
 create mode 100644 test/Transforms/AtomicExpand/Hexagon/lit.local.cfg
 create mode 100644 test/Transforms/AtomicExpand/Mips/atomicrmw-fp.ll
 create mode 100644 test/Transforms/AtomicExpand/Mips/lit.local.cfg
 create mode 100644 test/Transforms/AtomicExpand/RISCV/atomicrmw-fp.ll
 create mode 100644 test/Transforms/AtomicExpand/RISCV/lit.local.cfg
diff --git a/docs/LangRef.rst b/docs/LangRef.rst
index 9168648db6e..d9e9d73242d 100644
--- a/docs/LangRef.rst
+++ b/docs/LangRef.rst
@@ -8667,15 +8667,18 @@ operation. The operation must be one of the following keywords:
 -  min
 -  umax
 -  umin
+-  fadd
+-  fsub
 
 For most of these operations, the type of '<value>' must be an integer
 type whose bit width is a power of two greater than or equal to eight
 and less than or equal to a target-specific size limit. For xchg, this
 may also be a floating point type with the same size constraints as
-integers. The type of the '``<pointer>``' operand must be a pointer to
-that type. If the ``atomicrmw`` is marked as ``volatile``, then the
-optimizer is not allowed to modify the number or order of execution of
-this ``atomicrmw`` with other :ref:`volatile operations <volatile>`.
+integers.  For fadd/fsub, this must be a floating point type.  The
+type of the '``<pointer>``' operand must be a pointer to that type. If
+the ``atomicrmw`` is marked as ``volatile``, then the optimizer is not
+allowed to modify the number or order of execution of this
+``atomicrmw`` with other :ref:`volatile operations <volatile>`.
 
 A ``atomicrmw`` instruction can also take an optional
 ":ref:`syncscope <syncscope>`" argument.
@@ -8701,6 +8704,8 @@ operation argument:
    comparison)
 -  umin: ``*ptr = *ptr < val ? *ptr : val`` (using an unsigned
    comparison)
+- fadd: ``*ptr = *ptr + val`` (using floating point arithmetic)
+- fsub: ``*ptr = *ptr - val`` (using floating point arithmetic)
 
 Example:
 """"""""
diff --git a/include/llvm/Bitcode/LLVMBitCodes.h b/include/llvm/Bitcode/LLVMBitCodes.h
index e694331776c..ce853cd3998 100644
--- a/include/llvm/Bitcode/LLVMBitCodes.h
+++ b/include/llvm/Bitcode/LLVMBitCodes.h
@@ -406,7 +406,9 @@ enum RMWOperations {
   RMW_MAX = 7,
   RMW_MIN = 8,
   RMW_UMAX = 9,
-  RMW_UMIN = 10
+  RMW_UMIN = 10,
+  RMW_FADD = 11,
+  RMW_FSUB = 12
 };
 
 /// OverflowingBinaryOperatorOptionalFlags - Flags for serializing
diff --git a/include/llvm/CodeGen/TargetLowering.h b/include/llvm/CodeGen/TargetLowering.h
index 3886ac54ed3..850b9e2d555 100644
--- a/include/llvm/CodeGen/TargetLowering.h
+++ b/include/llvm/CodeGen/TargetLowering.h
@@ -1715,8 +1715,9 @@ public:
 
   /// Returns how the IR-level AtomicExpand pass should expand the given
   /// AtomicRMW, if at all. Default is to never expand.
-  virtual AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *) const {
-    return AtomicExpansionKind::None;
+  virtual AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
+    return RMW->isFloatingPointOperation() ?
+      AtomicExpansionKind::CmpXChg : AtomicExpansionKind::None;
   }
 
   /// On some platforms, an AtomicRMW that never actually modifies the value
diff --git a/include/llvm/IR/Instructions.h b/include/llvm/IR/Instructions.h
index c720dd8745e..beadf7313a2 100644
--- a/include/llvm/IR/Instructions.h
+++ b/include/llvm/IR/Instructions.h
@@ -724,8 +724,14 @@ public:
     /// *p = old <unsigned v ? old : v
     UMin,
 
+    /// *p = old + v
+    FAdd,
+
+    /// *p = old - v
+    FSub,
+
     FIRST_BINOP = Xchg,
-    LAST_BINOP = UMin,
+    LAST_BINOP = FSub,
     BAD_BINOP
   };
 
@@ -747,6 +753,16 @@ public:
 
   static StringRef getOperationName(BinOp Op);
 
+  static bool isFPOperation(BinOp Op) {
+    switch (Op) {
+    case AtomicRMWInst::FAdd:
+    case AtomicRMWInst::FSub:
+      return true;
+    default:
+      return false;
+    }
+  }
+
   void setOperation(BinOp Operation) {
     unsigned short SubclassData = getSubclassDataFromInstruction();
     setInstructionSubclassData((SubclassData & 31) |
@@ -804,6 +820,10 @@ public:
     return getPointerOperand()->getType()->getPointerAddressSpace();
   }
 
+  bool isFloatingPointOperation() const {
+    return isFPOperation(getOperation());
+  }
+
   // Methods for support type inquiry through isa, cast, and dyn_cast:
   static bool classof(const Instruction *I) {
     return I->getOpcode() == Instruction::AtomicRMW;
diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp
index 20d96061102..855c5d26500 100644
--- a/lib/AsmParser/LLParser.cpp
+++ b/lib/AsmParser/LLParser.cpp
@@ -6815,6 +6815,7 @@ int LLParser::ParseAtomicRMW(Instruction *&Inst, PerFunctionState &PFS) {
   AtomicOrdering Ordering = AtomicOrdering::NotAtomic;
   SyncScope::ID SSID = SyncScope::System;
   bool isVolatile = false;
+  bool IsFP = false;
   AtomicRMWInst::BinOp Operation;
 
   if (EatIfPresent(lltok::kw_volatile))
@@ -6833,6 +6834,14 @@ int LLParser::ParseAtomicRMW(Instruction *&Inst, PerFunctionState &PFS) {
   case lltok::kw_min: Operation = AtomicRMWInst::Min; break;
   case lltok::kw_umax: Operation = AtomicRMWInst::UMax; break;
   case lltok::kw_umin: Operation = AtomicRMWInst::UMin; break;
+  case lltok::kw_fadd:
+    Operation = AtomicRMWInst::FAdd;
+    IsFP = true;
+    break;
+  case lltok::kw_fsub:
+    Operation = AtomicRMWInst::FSub;
+    IsFP = true;
+    break;
   }
   Lex.Lex();  // Eat the operation.
 
@@ -6849,18 +6858,25 @@ int LLParser::ParseAtomicRMW(Instruction *&Inst, PerFunctionState &PFS) {
   if (cast<PointerType>(Ptr->getType())->getElementType() != Val->getType())
     return Error(ValLoc, "atomicrmw value and pointer type do not match");
 
-  if (Operation != AtomicRMWInst::Xchg && !Val->getType()->isIntegerTy()) {
-    return Error(ValLoc, "atomicrmw " +
-                 AtomicRMWInst::getOperationName(Operation) +
-                 " operand must be an integer");
-  }
-
-  if (Operation == AtomicRMWInst::Xchg &&
-      !Val->getType()->isIntegerTy() &&
-      !Val->getType()->isFloatingPointTy()) {
-    return Error(ValLoc, "atomicrmw " +
-                 AtomicRMWInst::getOperationName(Operation) +
-                 " operand must be an integer or floating point type");
+  if (Operation == AtomicRMWInst::Xchg) {
+    if (!Val->getType()->isIntegerTy() &&
+        !Val->getType()->isFloatingPointTy()) {
+      return Error(ValLoc, "atomicrmw " +
+                   AtomicRMWInst::getOperationName(Operation) +
+                   " operand must be an integer or floating point type");
+    }
+  } else if (IsFP) {
+    if (!Val->getType()->isFloatingPointTy()) {
+      return Error(ValLoc, "atomicrmw " +
+                   AtomicRMWInst::getOperationName(Operation) +
+                   " operand must be a floating point type");
+    }
+  } else {
+    if (!Val->getType()->isIntegerTy()) {
+      return Error(ValLoc, "atomicrmw " +
+                   AtomicRMWInst::getOperationName(Operation) +
+                   " operand must be an integer");
+    }
   }
 
   unsigned Size = Val->getType()->getPrimitiveSizeInBits();
diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp
index 87bd8f625fa..0b93a61dc40 100644
--- a/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -1034,6 +1034,8 @@ static AtomicRMWInst::BinOp getDecodedRMWOperation(unsigned Val) {
   case bitc::RMW_MIN: return AtomicRMWInst::Min;
   case bitc::RMW_UMAX: return AtomicRMWInst::UMax;
   case bitc::RMW_UMIN: return AtomicRMWInst::UMin;
+  case bitc::RMW_FADD: return AtomicRMWInst::FAdd;
+  case bitc::RMW_FSUB: return AtomicRMWInst::FSub;
   }
 }
 
diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp
index d345ae0c416..f4a539e51f7 100644
--- a/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -559,6 +559,8 @@ static unsigned getEncodedRMWOperation(AtomicRMWInst::BinOp Op) {
   case AtomicRMWInst::Min: return bitc::RMW_MIN;
   case AtomicRMWInst::UMax: return bitc::RMW_UMAX;
   case AtomicRMWInst::UMin: return bitc::RMW_UMIN;
+  case AtomicRMWInst::FAdd: return bitc::RMW_FADD;
+  case AtomicRMWInst::FSub: return bitc::RMW_FSUB;
   }
 }
 
diff --git a/lib/CodeGen/AtomicExpandPass.cpp b/lib/CodeGen/AtomicExpandPass.cpp
index bef9682fbd5..2d915945392 100644
--- a/lib/CodeGen/AtomicExpandPass.cpp
+++ b/lib/CodeGen/AtomicExpandPass.cpp
@@ -549,6 +549,10 @@ static Value *performAtomicOp(AtomicRMWInst::BinOp Op, IRBuilder<> &Builder,
   case AtomicRMWInst::UMin:
     NewVal = Builder.CreateICmpULE(Loaded, Inc);
     return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
+  case AtomicRMWInst::FAdd:
+    return Builder.CreateFAdd(Loaded, Inc, "new");
+  case AtomicRMWInst::FSub:
+    return Builder.CreateFSub(Loaded, Inc, "new");
   default:
     llvm_unreachable("Unknown atomic op");
   }
@@ -1547,6 +1551,8 @@ static ArrayRef<RTLIB::Libcall> GetRMWLibcall(AtomicRMWInst::BinOp Op) {
   case AtomicRMWInst::Min:
   case AtomicRMWInst::UMax:
   case AtomicRMWInst::UMin:
+  case AtomicRMWInst::FAdd:
+  case AtomicRMWInst::FSub:
     // No atomic libcalls are available for max/min/umax/umin.
     return {};
   }
diff --git a/lib/IR/Instructions.cpp b/lib/IR/Instructions.cpp
index 8b674465792..f48a970fd40 100644
--- a/lib/IR/Instructions.cpp
+++ b/lib/IR/Instructions.cpp
@@ -1407,6 +1407,10 @@ StringRef AtomicRMWInst::getOperationName(BinOp Op) {
     return "umax";
   case AtomicRMWInst::UMin:
     return "umin";
+  case AtomicRMWInst::FAdd:
+    return "fadd";
+  case AtomicRMWInst::FSub:
+    return "fsub";
   case AtomicRMWInst::BAD_BINOP:
     return "<invalid operation>";
   }
diff --git a/lib/IR/Verifier.cpp b/lib/IR/Verifier.cpp
index fb2388c232b..1000e210533 100644
--- a/lib/IR/Verifier.cpp
+++ b/lib/IR/Verifier.cpp
@@ -3435,6 +3435,11 @@ void Verifier::visitAtomicRMWInst(AtomicRMWInst &RMWI) {
            AtomicRMWInst::getOperationName(Op) +
            " operand must have integer or floating point type!",
            &RMWI, ElTy);
+  } else if (AtomicRMWInst::isFPOperation(Op)) {
+    Assert(ElTy->isFloatingPointTy(), "atomicrmw " +
+           AtomicRMWInst::getOperationName(Op) +
+           " operand must have floating point type!",
+           &RMWI, ElTy);
   } else {
     Assert(ElTy->isIntegerTy(), "atomicrmw " +
            AtomicRMWInst::getOperationName(Op) +
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
index 2530b0af87d..df77e8e7d5b 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -11600,6 +11600,9 @@ AArch64TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
 // For the real atomic operations, we have ldxr/stxr up to 128 bits,
 TargetLowering::AtomicExpansionKind
 AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
+  if (AI->isFloatingPointOperation())
+    return AtomicExpansionKind::CmpXChg;
+
   unsigned Size = AI->getType()->getPrimitiveSizeInBits();
   if (Size > 128) return AtomicExpansionKind::None;
   // Nand not supported in LSE.
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 5cf9a2fc2bf..469ceb9c213 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -14645,6 +14645,9 @@ ARMTargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
 // and up to 64 bits on the non-M profiles
 TargetLowering::AtomicExpansionKind
 ARMTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
+  if (AI->isFloatingPointOperation())
+    return AtomicExpansionKind::CmpXChg;
+
   unsigned Size = AI->getType()->getPrimitiveSizeInBits();
   bool hasAtomicRMW = !Subtarget->isThumb() || Subtarget->hasV8MBaselineOps();
   return (Size <= (Subtarget->isMClass() ? 32U : 64U) && hasAtomicRMW)
diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp
index 329d7aeb693..b3781fe9e71 100644
--- a/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -3110,13 +3110,21 @@ Value *HexagonTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
       AtomicOrdering Ord) const {
   BasicBlock *BB = Builder.GetInsertBlock();
   Module *M = BB->getParent()->getParent();
-  Type *Ty = cast<PointerType>(Addr->getType())->getElementType();
+  auto PT = cast<PointerType>(Addr->getType());
+  Type *Ty = PT->getElementType();
   unsigned SZ = Ty->getPrimitiveSizeInBits();
   assert((SZ == 32 || SZ == 64) && "Only 32/64-bit atomic loads supported");
   Intrinsic::ID IntID = (SZ == 32) ? Intrinsic::hexagon_L2_loadw_locked
                                    : Intrinsic::hexagon_L4_loadd_locked;
+
+  PointerType *NewPtrTy
+    = Builder.getIntNTy(SZ)->getPointerTo(PT->getAddressSpace());
+  Addr = Builder.CreateBitCast(Addr, NewPtrTy);
+
   Value *Fn = Intrinsic::getDeclaration(M, IntID);
-  return Builder.CreateCall(Fn, Addr, "larx");
+  Value *Call = Builder.CreateCall(Fn, Addr, "larx");
+
+  return Builder.CreateBitCast(Call, Ty);
 }
 
 /// Perform a store-conditional operation to Addr. Return the status of the
@@ -3127,10 +3135,17 @@ Value *HexagonTargetLowering::emitStoreConditional(IRBuilder<> &Builder,
   Module *M = BB->getParent()->getParent();
   Type *Ty = Val->getType();
   unsigned SZ = Ty->getPrimitiveSizeInBits();
+
+  Type *CastTy = Builder.getIntNTy(SZ);
   assert((SZ == 32 || SZ == 64) && "Only 32/64-bit atomic stores supported");
   Intrinsic::ID IntID = (SZ == 32) ? Intrinsic::hexagon_S2_storew_locked
                                    : Intrinsic::hexagon_S4_stored_locked;
   Value *Fn = Intrinsic::getDeclaration(M, IntID);
+
+  unsigned AS = Addr->getType()->getPointerAddressSpace();
+  Addr = Builder.CreateBitCast(Addr, CastTy->getPointerTo(AS));
+  Val = Builder.CreateBitCast(Val, CastTy);
+
   Value *Call = Builder.CreateCall(Fn, {Addr, Val}, "stcx");
   Value *Cmp = Builder.CreateICmpEQ(Call, Builder.getInt32(0), "");
   Value *Ext = Builder.CreateZExt(Cmp, Type::getInt32Ty(M->getContext()));
diff --git a/lib/Target/RISCV/RISCVISelLowering.cpp b/lib/Target/RISCV/RISCVISelLowering.cpp
index 8a32d957ec5..f70ffaab0c1 100644
--- a/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1823,6 +1823,9 @@ Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic(
 TargetLowering::AtomicExpansionKind
 RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR(
     AtomicCmpXchgInst *CI) const {
+  if (CI->isFloatingPointOperation())
+    return AtomicExpansionKind::CmpXChg;
+
   unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits();
   if (Size == 8 || Size == 16)
     return AtomicExpansionKind::MaskedIntrinsic;
diff --git a/test/Assembler/atomic.ll b/test/Assembler/atomic.ll
index a8b527f2f86..04ff262074f 100644
--- a/test/Assembler/atomic.ll
+++ b/test/Assembler/atomic.ll
@@ -39,3 +39,13 @@ define void @f(i32* %x) {
   fence syncscope("device") seq_cst
   ret void
 }
+
+define void @fp_atomics(float* %x) {
+ ; CHECK: atomicrmw fadd float* %x, float 1.000000e+00 seq_cst
+  atomicrmw fadd float* %x, float 1.0 seq_cst
+
+  ; CHECK: atomicrmw volatile fadd float* %x, float 1.000000e+00 seq_cst
+  atomicrmw volatile fadd float* %x, float 1.0 seq_cst
+
+  ret void
+}
diff --git a/test/Assembler/invalid-atomicrmw-fadd-must-be-fp-type.ll b/test/Assembler/invalid-atomicrmw-fadd-must-be-fp-type.ll
new file mode 100644
index 00000000000..3185d9505db
--- /dev/null
+++ b/test/Assembler/invalid-atomicrmw-fadd-must-be-fp-type.ll
@@ -0,0 +1,7 @@
+; RUN: not llvm-as -disable-output %s 2>&1 | FileCheck %s
+
+; CHECK: error: atomicrmw fadd operand must be a floating point type
+define void @f(i32* %ptr) {
+  atomicrmw fadd i32* %ptr, i32 2 seq_cst
+  ret void
+}
diff --git a/test/Assembler/invalid-atomicrmw-fsub-must-be-fp-type.ll b/test/Assembler/invalid-atomicrmw-fsub-must-be-fp-type.ll
new file mode 100644
index 00000000000..cd5bd4beff5
--- /dev/null
+++ b/test/Assembler/invalid-atomicrmw-fsub-must-be-fp-type.ll
@@ -0,0 +1,7 @@
+; RUN: not llvm-as -disable-output %s 2>&1 | FileCheck %s
+
+; CHECK: error: atomicrmw fsub operand must be a floating point type
+define void @f(i32* %ptr) {
+  atomicrmw fsub i32* %ptr, i32 2 seq_cst
+  ret void
+}
diff --git a/test/Bitcode/compatibility.ll b/test/Bitcode/compatibility.ll
index 320ed831db3..3c5e86eee01 100644
--- a/test/Bitcode/compatibility.ll
+++ b/test/Bitcode/compatibility.ll
@@ -764,6 +764,13 @@ define void @atomics(i32* %word) {
 define void @fp_atomics(float* %word) {
 ; CHECK: %atomicrmw.xchg = atomicrmw xchg float* %word, float 1.000000e+00 monotonic
   %atomicrmw.xchg = atomicrmw xchg float* %word, float 1.0 monotonic
+
+; CHECK: %atomicrmw.fadd = atomicrmw fadd float* %word, float 1.000000e+00 monotonic
+  %atomicrmw.fadd = atomicrmw fadd float* %word, float 1.0 monotonic
+
+; CHECK: %atomicrmw.fsub = atomicrmw fsub float* %word, float 1.000000e+00 monotonic
+  %atomicrmw.fsub = atomicrmw fsub float* %word, float 1.0 monotonic
+
   ret void
 }
 
diff --git a/test/Transforms/AtomicExpand/AArch64/atomicrmw-fp.ll b/test/Transforms/AtomicExpand/AArch64/atomicrmw-fp.ll
new file mode 100644
index 00000000000..d63f911a33f
--- /dev/null
+++ b/test/Transforms/AtomicExpand/AArch64/atomicrmw-fp.ll
@@ -0,0 +1,47 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -mtriple=aarch64-linux-gnu -atomic-expand %s | FileCheck %s
+
+define float @test_atomicrmw_fadd_f32(float* %ptr, float %value) {
+; CHECK-LABEL: @test_atomicrmw_fadd_f32(
+; CHECK-NEXT:    [[TMP1:%.*]] = load float, float* [[PTR:%.*]], align 4
+; CHECK-NEXT:    br label [[ATOMICRMW_START:%.*]]
+; CHECK:       atomicrmw.start:
+; CHECK-NEXT:    [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; CHECK-NEXT:    [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast float* [[PTR]] to i32*
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast float [[NEW]] to i32
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
+; CHECK-NEXT:    [[TMP5:%.*]] = cmpxchg i32* [[TMP2]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst
+; CHECK-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
+; CHECK-NEXT:    [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
+; CHECK-NEXT:    [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
+; CHECK-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; CHECK:       atomicrmw.end:
+; CHECK-NEXT:    ret float [[TMP6]]
+;
+  %res = atomicrmw fadd float* %ptr, float %value seq_cst
+  ret float %res
+}
+
+define float @test_atomicrmw_fsub_f32(float* %ptr, float %value) {
+; CHECK-LABEL: @test_atomicrmw_fsub_f32(
+; CHECK-NEXT:    [[TMP1:%.*]] = load float, float* [[PTR:%.*]], align 4
+; CHECK-NEXT:    br label [[ATOMICRMW_START:%.*]]
+; CHECK:       atomicrmw.start:
+; CHECK-NEXT:    [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; CHECK-NEXT:    [[NEW:%.*]] = fsub float [[LOADED]], [[VALUE:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast float* [[PTR]] to i32*
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast float [[NEW]] to i32
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
+; CHECK-NEXT:    [[TMP5:%.*]] = cmpxchg i32* [[TMP2]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst
+; CHECK-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
+; CHECK-NEXT:    [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
+; CHECK-NEXT:    [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
+; CHECK-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; CHECK:       atomicrmw.end:
+; CHECK-NEXT:    ret float [[TMP6]]
+;
+  %res = atomicrmw fsub float* %ptr, float %value seq_cst
+  ret float %res
+}
+
diff --git a/test/Transforms/AtomicExpand/ARM/atomicrmw-fp.ll b/test/Transforms/AtomicExpand/ARM/atomicrmw-fp.ll
new file mode 100644
index 00000000000..6f8ffc1cba2
--- /dev/null
+++ b/test/Transforms/AtomicExpand/ARM/atomicrmw-fp.ll
@@ -0,0 +1,51 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -mtriple=armv7-apple-ios7.0 -atomic-expand %s | FileCheck %s
+
+define float @test_atomicrmw_fadd_f32(float* %ptr, float %value) {
+; CHECK-LABEL: @test_atomicrmw_fadd_f32(
+; CHECK-NEXT:    call void @llvm.arm.dmb(i32 11)
+; CHECK-NEXT:    [[TMP1:%.*]] = load float, float* [[PTR:%.*]], align 4
+; CHECK-NEXT:    br label [[ATOMICRMW_START:%.*]]
+; CHECK:       atomicrmw.start:
+; CHECK-NEXT:    [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; CHECK-NEXT:    [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast float* [[PTR]] to i32*
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast float [[NEW]] to i32
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
+; CHECK-NEXT:    [[TMP5:%.*]] = cmpxchg i32* [[TMP2]], i32 [[TMP4]], i32 [[TMP3]] monotonic monotonic
+; CHECK-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
+; CHECK-NEXT:    [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
+; CHECK-NEXT:    [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
+; CHECK-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; CHECK:       atomicrmw.end:
+; CHECK-NEXT:    call void @llvm.arm.dmb(i32 11)
+; CHECK-NEXT:    ret float [[TMP6]]
+;
+  %res = atomicrmw fadd float* %ptr, float %value seq_cst
+  ret float %res
+}
+
+define float @test_atomicrmw_fsub_f32(float* %ptr, float %value) {
+; CHECK-LABEL: @test_atomicrmw_fsub_f32(
+; CHECK-NEXT:    call void @llvm.arm.dmb(i32 11)
+; CHECK-NEXT:    [[TMP1:%.*]] = load float, float* [[PTR:%.*]], align 4
+; CHECK-NEXT:    br label [[ATOMICRMW_START:%.*]]
+; CHECK:       atomicrmw.start:
+; CHECK-NEXT:    [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; CHECK-NEXT:    [[NEW:%.*]] = fsub float [[LOADED]], [[VALUE:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast float* [[PTR]] to i32*
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast float [[NEW]] to i32
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
+; CHECK-NEXT:    [[TMP5:%.*]] = cmpxchg i32* [[TMP2]], i32 [[TMP4]], i32 [[TMP3]] monotonic monotonic
+; CHECK-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
+; CHECK-NEXT:    [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
+; CHECK-NEXT:    [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
+; CHECK-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; CHECK:       atomicrmw.end:
+; CHECK-NEXT:    call void @llvm.arm.dmb(i32 11)
+; CHECK-NEXT:    ret float [[TMP6]]
+;
+  %res = atomicrmw fsub float* %ptr, float %value seq_cst
+  ret float %res
+}
+
diff --git a/test/Transforms/AtomicExpand/Hexagon/atomicrmw-fp.ll b/test/Transforms/AtomicExpand/Hexagon/atomicrmw-fp.ll
new file mode 100644
index 00000000000..34026909d76
--- /dev/null
+++ b/test/Transforms/AtomicExpand/Hexagon/atomicrmw-fp.ll
@@ -0,0 +1,47 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -mtriple=hexagon-- -atomic-expand %s | FileCheck %s
+
+define float @test_atomicrmw_fadd_f32(float* %ptr, float %value) {
+; CHECK-LABEL: @test_atomicrmw_fadd_f32(
+; CHECK-NEXT:    br label [[ATOMICRMW_START:%.*]]
+; CHECK:       atomicrmw.start:
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[PTR:%.*]] to i32*
+; CHECK-NEXT:    [[LARX:%.*]] = call i32 @llvm.hexagon.L2.loadw.locked(i32* [[TMP1]])
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32 [[LARX]] to float
+; CHECK-NEXT:    [[NEW:%.*]] = fadd float [[TMP2]], [[VALUE:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast float* [[PTR]] to i32*
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast float [[NEW]] to i32
+; CHECK-NEXT:    [[STCX:%.*]] = call i32 @llvm.hexagon.S2.storew.locked(i32* [[TMP3]], i32 [[TMP4]])
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i32 [[STCX]], 0
+; CHECK-NEXT:    [[TMP6:%.*]] = zext i1 [[TMP5]] to i32
+; CHECK-NEXT:    [[TRYAGAIN:%.*]] = icmp ne i32 [[TMP6]], 0
+; CHECK-NEXT:    br i1 [[TRYAGAIN]], label [[ATOMICRMW_START]], label [[ATOMICRMW_END:%.*]]
+; CHECK:       atomicrmw.end:
+; CHECK-NEXT:    ret float [[TMP2]]
+;
+  %res = atomicrmw fadd float* %ptr, float %value seq_cst
+  ret float %res
+}
+
+define float @test_atomicrmw_fsub_f32(float* %ptr, float %value) {
+; CHECK-LABEL: @test_atomicrmw_fsub_f32(
+; CHECK-NEXT:    br label [[ATOMICRMW_START:%.*]]
+; CHECK:       atomicrmw.start:
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[PTR:%.*]] to i32*
+; CHECK-NEXT:    [[LARX:%.*]] = call i32 @llvm.hexagon.L2.loadw.locked(i32* [[TMP1]])
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32 [[LARX]] to float
+; CHECK-NEXT:    [[NEW:%.*]] = fsub float [[TMP2]], [[VALUE:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast float* [[PTR]] to i32*
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast float [[NEW]] to i32
+; CHECK-NEXT:    [[STCX:%.*]] = call i32 @llvm.hexagon.S2.storew.locked(i32* [[TMP3]], i32 [[TMP4]])
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i32 [[STCX]], 0
+; CHECK-NEXT:    [[TMP6:%.*]] = zext i1 [[TMP5]] to i32
+; CHECK-NEXT:    [[TRYAGAIN:%.*]] = icmp ne i32 [[TMP6]], 0
+; CHECK-NEXT:    br i1 [[TRYAGAIN]], label [[ATOMICRMW_START]], label [[ATOMICRMW_END:%.*]]
+; CHECK:       atomicrmw.end:
+; CHECK-NEXT:    ret float [[TMP2]]
+;
+  %res = atomicrmw fsub float* %ptr, float %value seq_cst
+  ret float %res
+}
+
diff --git a/test/Transforms/AtomicExpand/Hexagon/lit.local.cfg b/test/Transforms/AtomicExpand/Hexagon/lit.local.cfg
new file mode 100644
index 00000000000..cc6a7edf05f
--- /dev/null
+++ b/test/Transforms/AtomicExpand/Hexagon/lit.local.cfg
@@ -0,0 +1,2 @@
+if not 'Hexagon' in config.root.targets:
+    config.unsupported = True
diff --git a/test/Transforms/AtomicExpand/Mips/atomicrmw-fp.ll b/test/Transforms/AtomicExpand/Mips/atomicrmw-fp.ll
new file mode 100644
index 00000000000..7931b2bb7f3
--- /dev/null
+++ b/test/Transforms/AtomicExpand/Mips/atomicrmw-fp.ll
@@ -0,0 +1,51 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -mtriple=mips64-mti-linux-gnu -atomic-expand %s | FileCheck %s
+
+define float @test_atomicrmw_fadd_f32(float* %ptr, float %value) {
+; CHECK-LABEL: @test_atomicrmw_fadd_f32(
+; CHECK-NEXT:    fence seq_cst
+; CHECK-NEXT:    [[TMP1:%.*]] = load float, float* [[PTR:%.*]], align 4
+; CHECK-NEXT:    br label [[ATOMICRMW_START:%.*]]
+; CHECK:       atomicrmw.start:
+; CHECK-NEXT:    [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; CHECK-NEXT:    [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast float* [[PTR]] to i32*
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast float [[NEW]] to i32
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
+; CHECK-NEXT:    [[TMP5:%.*]] = cmpxchg i32* [[TMP2]], i32 [[TMP4]], i32 [[TMP3]] monotonic monotonic
+; CHECK-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
+; CHECK-NEXT:    [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
+; CHECK-NEXT:    [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
+; CHECK-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; CHECK:       atomicrmw.end:
+; CHECK-NEXT:    fence seq_cst
+; CHECK-NEXT:    ret float [[TMP6]]
+;
+  %res = atomicrmw fadd float* %ptr, float %value seq_cst
+  ret float %res
+}
+
+define float @test_atomicrmw_fsub_f32(float* %ptr, float %value) {
+; CHECK-LABEL: @test_atomicrmw_fsub_f32(
+; CHECK-NEXT:    fence seq_cst
+; CHECK-NEXT:    [[TMP1:%.*]] = load float, float* [[PTR:%.*]], align 4
+; CHECK-NEXT:    br label [[ATOMICRMW_START:%.*]]
+; CHECK:       atomicrmw.start:
+; CHECK-NEXT:    [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; CHECK-NEXT:    [[NEW:%.*]] = fsub float [[LOADED]], [[VALUE:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast float* [[PTR]] to i32*
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast float [[NEW]] to i32
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
+; CHECK-NEXT:    [[TMP5:%.*]] = cmpxchg i32* [[TMP2]], i32 [[TMP4]], i32 [[TMP3]] monotonic monotonic
+; CHECK-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
+; CHECK-NEXT:    [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
+; CHECK-NEXT:    [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
+; CHECK-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; CHECK:       atomicrmw.end:
+; CHECK-NEXT:    fence seq_cst
+; CHECK-NEXT:    ret float [[TMP6]]
+;
+  %res = atomicrmw fsub float* %ptr, float %value seq_cst
+  ret float %res
+}
+
diff --git a/test/Transforms/AtomicExpand/Mips/lit.local.cfg b/test/Transforms/AtomicExpand/Mips/lit.local.cfg
new file mode 100644
index 00000000000..7d12f7a9c56
--- /dev/null
+++ b/test/Transforms/AtomicExpand/Mips/lit.local.cfg
@@ -0,0 +1,2 @@
+if not 'Mips' in config.root.targets:
+    config.unsupported = True
diff --git a/test/Transforms/AtomicExpand/RISCV/atomicrmw-fp.ll b/test/Transforms/AtomicExpand/RISCV/atomicrmw-fp.ll
new file mode 100644
index 00000000000..0d2d8ca4f67
--- /dev/null
+++ b/test/Transforms/AtomicExpand/RISCV/atomicrmw-fp.ll
@@ -0,0 +1,47 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -mtriple=riscv32-- -atomic-expand %s | FileCheck %s
+
+define float @test_atomicrmw_fadd_f32(float* %ptr, float %value) {
+; CHECK-LABEL: @test_atomicrmw_fadd_f32(
+; CHECK-NEXT:    [[TMP1:%.*]] = load float, float* [[PTR:%.*]], align 4
+; CHECK-NEXT:    br label [[ATOMICRMW_START:%.*]]
+; CHECK:       atomicrmw.start:
+; CHECK-NEXT:    [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; CHECK-NEXT:    [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast float* [[PTR]] to i32*
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast float [[NEW]] to i32
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
+; CHECK-NEXT:    [[TMP5:%.*]] = cmpxchg i32* [[TMP2]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst
+; CHECK-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
+; CHECK-NEXT:    [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
+; CHECK-NEXT:    [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
+; CHECK-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; CHECK:       atomicrmw.end:
+; CHECK-NEXT:    ret float [[TMP6]]
+;
+  %res = atomicrmw fadd float* %ptr, float %value seq_cst
+  ret float %res
+}
+
+define float @test_atomicrmw_fsub_f32(float* %ptr, float %value) {
+; CHECK-LABEL: @test_atomicrmw_fsub_f32(
+; CHECK-NEXT:    [[TMP1:%.*]] = load float, float* [[PTR:%.*]], align 4
+; CHECK-NEXT:    br label [[ATOMIxbCRMW_START:%.*]]
+; CHECK:       atomicrmw.start:
+; CHECK-NEXT:    [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; CHECK-NEXT:    [[NEW:%.*]] = fsub float [[LOADED]], [[VALUE:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast float* [[PTR]] to i32*
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast float [[NEW]] to i32
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
+; CHECK-NEXT:    [[TMP5:%.*]] = cmpxchg i32* [[TMP2]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst
+; CHECK-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
+; CHECK-NEXT:    [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
+; CHECK-NEXT:    [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
+; CHECK-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; CHECK:       atomicrmw.end:
+; CHECK-NEXT:    ret float [[TMP6]]
+;
+  %res = atomicrmw fsub float* %ptr, float %value seq_cst
+  ret float %res
+}
+
diff --git a/test/Transforms/AtomicExpand/RISCV/lit.local.cfg b/test/Transforms/AtomicExpand/RISCV/lit.local.cfg
new file mode 100644
index 00000000000..7aaeda5a5b3
--- /dev/null
+++ b/test/Transforms/AtomicExpand/RISCV/lit.local.cfg
@@ -0,0 +1,5 @@
+config.suffixes = ['.ll']
+
+targets = set(config.root.targets_to_build.split())
+if not 'RISCV' in targets:
+    config.unsupported = True