1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 11:13:28 +01:00

X86: expand atomics in IR instead of as MachineInstrs.

The logic for expanding atomics that aren't natively supported in
terms of cmpxchg loops is much simpler to express at the IR level. It
also allows the normal optimisations and CodeGen improvements to help
out with atomics, instead of using a limited set of possible
instructions..

rdar://problem/13496295

llvm-svn: 212119
This commit is contained in:
Tim Northover 2014-07-01 18:53:31 +00:00
parent 31dbd2c8cd
commit 60e9ada729
21 changed files with 886 additions and 1203 deletions

View File

@ -14,6 +14,7 @@ add_public_tablegen_target(X86CommonTableGen)
set(sources
X86AsmPrinter.cpp
X86AtomicExpandPass.cpp
X86CodeEmitter.cpp
X86FastISel.cpp
X86FloatingPoint.cpp

View File

@ -24,6 +24,10 @@ class ImmutablePass;
class JITCodeEmitter;
class X86TargetMachine;
/// createX86AtomicExpandPass - This pass expands atomic operations that cannot
/// be handled natively in terms of a loop using cmpxchg.
FunctionPass *createX86AtomicExpandPass(const X86TargetMachine *TM);
/// createX86ISelDag - This pass converts a legalized DAG into a
/// X86-specific DAG, ready for instruction scheduling.
///

View File

@ -0,0 +1,281 @@
//===-- X86AtomicExpandPass.cpp - Expand illegal atomic instructions --0---===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains a pass (at IR level) to replace atomic instructions which
// cannot be implemented as a single instruction with cmpxchg-based loops.
//
//===----------------------------------------------------------------------===//
#include "X86.h"
#include "X86TargetMachine.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/Debug.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
#define DEBUG_TYPE "x86-atomic-expand"
namespace {
class X86AtomicExpandPass : public FunctionPass {
const X86TargetMachine *TM;
public:
static char ID; // Pass identification, replacement for typeid
explicit X86AtomicExpandPass(const X86TargetMachine *TM)
: FunctionPass(ID), TM(TM) {}
bool runOnFunction(Function &F) override;
bool expandAtomicInsts(Function &F);
bool needsCmpXchgNb(Type *MemType);
/// There are four kinds of atomic operations. Two never need expanding:
/// cmpxchg is what we expand the others *to*, and loads are easily handled
/// by ISelLowering. Atomicrmw and store can need expanding in some
/// circumstances.
bool shouldExpand(Instruction *Inst);
/// 128-bit atomic stores (64-bit on i686) need to be implemented in terms
/// of trivial cmpxchg16b loops. A simple store isn't necessarily atomic.
bool shouldExpandStore(StoreInst *SI);
/// Only some atomicrmw instructions need expanding -- some operations
/// (e.g. max) have absolutely no architectural support; some (e.g. or) have
/// limited support but can't return the previous value; some (e.g. add)
/// have complete support in the instruction set.
///
/// Also, naturally, 128-bit operations always need to be expanded.
bool shouldExpandAtomicRMW(AtomicRMWInst *AI);
bool expandAtomicRMW(AtomicRMWInst *AI);
bool expandAtomicStore(StoreInst *SI);
};
}
char X86AtomicExpandPass::ID = 0;
FunctionPass *llvm::createX86AtomicExpandPass(const X86TargetMachine *TM) {
return new X86AtomicExpandPass(TM);
}
bool X86AtomicExpandPass::runOnFunction(Function &F) {
SmallVector<Instruction *, 1> AtomicInsts;
// Changing control-flow while iterating through it is a bad idea, so gather a
// list of all atomic instructions before we start.
for (BasicBlock &BB : F)
for (Instruction &Inst : BB) {
if (isa<AtomicRMWInst>(&Inst) ||
(isa<StoreInst>(&Inst) && cast<StoreInst>(&Inst)->isAtomic()))
AtomicInsts.push_back(&Inst);
}
bool MadeChange = false;
for (Instruction *Inst : AtomicInsts) {
if (!shouldExpand(Inst))
continue;
if (AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(Inst))
MadeChange |= expandAtomicRMW(AI);
if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
MadeChange |= expandAtomicStore(SI);
}
return MadeChange;
}
/// Returns true if operations on the given type will need to use either
/// cmpxchg8b or cmpxchg16b. This occurs if the type is 1 step up from the
/// native width, and the instructions are available (otherwise we leave them
/// alone to become __sync_fetch_and_... calls).
bool X86AtomicExpandPass::needsCmpXchgNb(llvm::Type *MemType) {
const X86Subtarget &Subtarget = TM->getSubtarget<X86Subtarget>();
if (!Subtarget.hasCmpxchg16b())
return false;
unsigned CmpXchgNbWidth = Subtarget.is64Bit() ? 128 : 64;
unsigned OpWidth = MemType->getPrimitiveSizeInBits();
if (OpWidth == CmpXchgNbWidth)
return true;
return false;
}
bool X86AtomicExpandPass::shouldExpandAtomicRMW(AtomicRMWInst *AI) {
const X86Subtarget &Subtarget = TM->getSubtarget<X86Subtarget>();
unsigned NativeWidth = Subtarget.is64Bit() ? 64 : 32;
if (needsCmpXchgNb(AI->getType()))
return true;
if (AI->getType()->getPrimitiveSizeInBits() > NativeWidth)
return false;
AtomicRMWInst::BinOp Op = AI->getOperation();
switch (Op) {
default:
llvm_unreachable("Unknown atomic operation");
case AtomicRMWInst::Xchg:
case AtomicRMWInst::Add:
case AtomicRMWInst::Sub:
// It's better to use xadd, xsub or xchg for these in all cases.
return false;
case AtomicRMWInst::Or:
case AtomicRMWInst::And:
case AtomicRMWInst::Xor:
// If the atomicrmw's result isn't actually used, we can just add a "lock"
// prefix to a normal instruction for these operations.
return !AI->use_empty();
case AtomicRMWInst::Nand:
case AtomicRMWInst::Max:
case AtomicRMWInst::Min:
case AtomicRMWInst::UMax:
case AtomicRMWInst::UMin:
// These always require a non-trivial set of data operations on x86. We must
// use a cmpxchg loop.
return true;
}
}
bool X86AtomicExpandPass::shouldExpandStore(StoreInst *SI) {
if (needsCmpXchgNb(SI->getValueOperand()->getType()))
return true;
return false;
}
bool X86AtomicExpandPass::shouldExpand(Instruction *Inst) {
if (AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(Inst))
return shouldExpandAtomicRMW(AI);
if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
return shouldExpandStore(SI);
return false;
}
/// Emit IR to implement the given atomicrmw operation on values in registers,
/// returning the new value.
static Value *performAtomicOp(AtomicRMWInst::BinOp Op, IRBuilder<> &Builder,
Value *Loaded, Value *Inc) {
Value *NewVal;
switch (Op) {
case AtomicRMWInst::Xchg:
return Inc;
case AtomicRMWInst::Add:
return Builder.CreateAdd(Loaded, Inc, "new");
case AtomicRMWInst::Sub:
return Builder.CreateSub(Loaded, Inc, "new");
case AtomicRMWInst::And:
return Builder.CreateAnd(Loaded, Inc, "new");
case AtomicRMWInst::Nand:
return Builder.CreateNot(Builder.CreateAnd(Loaded, Inc), "new");
case AtomicRMWInst::Or:
return Builder.CreateOr(Loaded, Inc, "new");
case AtomicRMWInst::Xor:
return Builder.CreateXor(Loaded, Inc, "new");
case AtomicRMWInst::Max:
NewVal = Builder.CreateICmpSGT(Loaded, Inc);
return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
case AtomicRMWInst::Min:
NewVal = Builder.CreateICmpSLE(Loaded, Inc);
return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
case AtomicRMWInst::UMax:
NewVal = Builder.CreateICmpUGT(Loaded, Inc);
return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
case AtomicRMWInst::UMin:
NewVal = Builder.CreateICmpULE(Loaded, Inc);
return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
default:
break;
}
llvm_unreachable("Unknown atomic op");
}
bool X86AtomicExpandPass::expandAtomicRMW(AtomicRMWInst *AI) {
AtomicOrdering Order =
AI->getOrdering() == Unordered ? Monotonic : AI->getOrdering();
Value *Addr = AI->getPointerOperand();
BasicBlock *BB = AI->getParent();
Function *F = BB->getParent();
LLVMContext &Ctx = F->getContext();
// Given: atomicrmw some_op iN* %addr, iN %incr ordering
//
// The standard expansion we produce is:
// [...]
// %init_loaded = load atomic iN* %addr
// br label %loop
// loop:
// %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ]
// %new = some_op iN %loaded, %incr
// %pair = cmpxchg iN* %addr, iN %loaded, iN %new
// %new_loaded = extractvalue { iN, i1 } %pair, 0
// %success = extractvalue { iN, i1 } %pair, 1
// br i1 %success, label %atomicrmw.end, label %loop
// atomicrmw.end:
// [...]
BasicBlock *ExitBB = BB->splitBasicBlock(AI, "atomicrmw.end");
BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
// This grabs the DebugLoc from AI.
IRBuilder<> Builder(AI);
// The split call above "helpfully" added a branch at the end of BB (to the
// wrong place), but we want a load. It's easiest to just remove
// the branch entirely.
std::prev(BB->end())->eraseFromParent();
Builder.SetInsertPoint(BB);
LoadInst *InitLoaded = Builder.CreateLoad(Addr);
InitLoaded->setAlignment(AI->getType()->getPrimitiveSizeInBits());
Builder.CreateBr(LoopBB);
// Start the main loop block now that we've taken care of the preliminaries.
Builder.SetInsertPoint(LoopBB);
PHINode *Loaded = Builder.CreatePHI(AI->getType(), 2, "loaded");
Loaded->addIncoming(InitLoaded, BB);
Value *NewVal =
performAtomicOp(AI->getOperation(), Builder, Loaded, AI->getValOperand());
Value *Pair = Builder.CreateAtomicCmpXchg(
Addr, Loaded, NewVal, Order,
AtomicCmpXchgInst::getStrongestFailureOrdering(Order));
Value *NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
Loaded->addIncoming(NewLoaded, LoopBB);
Value *Success = Builder.CreateExtractValue(Pair, 1, "success");
Builder.CreateCondBr(Success, ExitBB, LoopBB);
AI->replaceAllUsesWith(NewLoaded);
AI->eraseFromParent();
return true;
}
bool X86AtomicExpandPass::expandAtomicStore(StoreInst *SI) {
// An atomic store might need cmpxchg16b (or 8b on x86) to execute. Express
// this in terms of the usual expansion to "atomicrmw xchg".
IRBuilder<> Builder(SI);
AtomicRMWInst *AI =
Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, SI->getPointerOperand(),
SI->getValueOperand(), SI->getOrdering());
SI->eraseFromParent();
// Now we have an appropriate swap instruction, lower it as usual.
if (shouldExpandAtomicRMW(AI))
return expandAtomicRMW(AI);
return AI;
}

View File

@ -2126,38 +2126,6 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
return getGlobalBaseReg();
case X86ISD::ATOMOR64_DAG:
case X86ISD::ATOMXOR64_DAG:
case X86ISD::ATOMADD64_DAG:
case X86ISD::ATOMSUB64_DAG:
case X86ISD::ATOMNAND64_DAG:
case X86ISD::ATOMAND64_DAG:
case X86ISD::ATOMMAX64_DAG:
case X86ISD::ATOMMIN64_DAG:
case X86ISD::ATOMUMAX64_DAG:
case X86ISD::ATOMUMIN64_DAG:
case X86ISD::ATOMSWAP64_DAG: {
unsigned Opc;
switch (Opcode) {
default: llvm_unreachable("Impossible opcode");
case X86ISD::ATOMOR64_DAG: Opc = X86::ATOMOR6432; break;
case X86ISD::ATOMXOR64_DAG: Opc = X86::ATOMXOR6432; break;
case X86ISD::ATOMADD64_DAG: Opc = X86::ATOMADD6432; break;
case X86ISD::ATOMSUB64_DAG: Opc = X86::ATOMSUB6432; break;
case X86ISD::ATOMNAND64_DAG: Opc = X86::ATOMNAND6432; break;
case X86ISD::ATOMAND64_DAG: Opc = X86::ATOMAND6432; break;
case X86ISD::ATOMMAX64_DAG: Opc = X86::ATOMMAX6432; break;
case X86ISD::ATOMMIN64_DAG: Opc = X86::ATOMMIN6432; break;
case X86ISD::ATOMUMAX64_DAG: Opc = X86::ATOMUMAX6432; break;
case X86ISD::ATOMUMIN64_DAG: Opc = X86::ATOMUMIN6432; break;
case X86ISD::ATOMSWAP64_DAG: Opc = X86::ATOMSWAP6432; break;
}
SDNode *RetVal = SelectAtomic64(Node, Opc);
if (RetVal)
return RetVal;
break;
}
case ISD::ATOMIC_LOAD_XOR:
case ISD::ATOMIC_LOAD_AND:
case ISD::ATOMIC_LOAD_OR:

View File

@ -592,21 +592,6 @@ void X86TargetLowering::resetOperationActions() {
setOperationAction(ISD::ATOMIC_STORE, VT, Custom);
}
if (!Subtarget->is64Bit()) {
setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Custom);
setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, Custom);
setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom);
setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom);
setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, Custom);
setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, Custom);
setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i64, Custom);
setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Custom);
setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i64, Custom);
setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i64, Custom);
setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i64, Custom);
setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i64, Custom);
}
if (Subtarget->hasCmpxchg16b()) {
setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom);
}
@ -16228,29 +16213,6 @@ static void ReplaceATOMIC_LOAD(SDNode *Node,
Results.push_back(Swap.getValue(2));
}
static void
ReplaceATOMIC_BINARY_64(SDNode *Node, SmallVectorImpl<SDValue>&Results,
SelectionDAG &DAG, unsigned NewOp) {
SDLoc dl(Node);
assert (Node->getValueType(0) == MVT::i64 &&
"Only know how to expand i64 atomics");
SDValue Chain = Node->getOperand(0);
SDValue In1 = Node->getOperand(1);
SDValue In2L = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
Node->getOperand(2), DAG.getIntPtrConstant(0));
SDValue In2H = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
Node->getOperand(2), DAG.getIntPtrConstant(1));
SDValue Ops[] = { Chain, In1, In2L, In2H };
SDVTList Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
SDValue Result =
DAG.getMemIntrinsicNode(NewOp, dl, Tys, Ops, MVT::i64,
cast<MemSDNode>(Node)->getMemOperand());
SDValue OpsF[] = { Result.getValue(0), Result.getValue(1)};
Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, OpsF));
Results.push_back(Result.getValue(2));
}
/// ReplaceNodeResults - Replace a node with an illegal result type
/// with a new node built out of custom code.
void X86TargetLowering::ReplaceNodeResults(SDNode *N,
@ -16398,57 +16360,6 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
Results.push_back(EFLAGS.getValue(1));
return;
}
case ISD::ATOMIC_LOAD_ADD:
case ISD::ATOMIC_LOAD_AND:
case ISD::ATOMIC_LOAD_NAND:
case ISD::ATOMIC_LOAD_OR:
case ISD::ATOMIC_LOAD_SUB:
case ISD::ATOMIC_LOAD_XOR:
case ISD::ATOMIC_LOAD_MAX:
case ISD::ATOMIC_LOAD_MIN:
case ISD::ATOMIC_LOAD_UMAX:
case ISD::ATOMIC_LOAD_UMIN:
case ISD::ATOMIC_SWAP: {
unsigned Opc;
switch (N->getOpcode()) {
default: llvm_unreachable("Unexpected opcode");
case ISD::ATOMIC_LOAD_ADD:
Opc = X86ISD::ATOMADD64_DAG;
break;
case ISD::ATOMIC_LOAD_AND:
Opc = X86ISD::ATOMAND64_DAG;
break;
case ISD::ATOMIC_LOAD_NAND:
Opc = X86ISD::ATOMNAND64_DAG;
break;
case ISD::ATOMIC_LOAD_OR:
Opc = X86ISD::ATOMOR64_DAG;
break;
case ISD::ATOMIC_LOAD_SUB:
Opc = X86ISD::ATOMSUB64_DAG;
break;
case ISD::ATOMIC_LOAD_XOR:
Opc = X86ISD::ATOMXOR64_DAG;
break;
case ISD::ATOMIC_LOAD_MAX:
Opc = X86ISD::ATOMMAX64_DAG;
break;
case ISD::ATOMIC_LOAD_MIN:
Opc = X86ISD::ATOMMIN64_DAG;
break;
case ISD::ATOMIC_LOAD_UMAX:
Opc = X86ISD::ATOMUMAX64_DAG;
break;
case ISD::ATOMIC_LOAD_UMIN:
Opc = X86ISD::ATOMUMIN64_DAG;
break;
case ISD::ATOMIC_SWAP:
Opc = X86ISD::ATOMSWAP64_DAG;
break;
}
ReplaceATOMIC_BINARY_64(N, Results, DAG, Opc);
return;
}
case ISD::ATOMIC_LOAD: {
ReplaceATOMIC_LOAD(N, Results, DAG);
return;
@ -16556,12 +16467,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::LCMPXCHG_DAG: return "X86ISD::LCMPXCHG_DAG";
case X86ISD::LCMPXCHG8_DAG: return "X86ISD::LCMPXCHG8_DAG";
case X86ISD::LCMPXCHG16_DAG: return "X86ISD::LCMPXCHG16_DAG";
case X86ISD::ATOMADD64_DAG: return "X86ISD::ATOMADD64_DAG";
case X86ISD::ATOMSUB64_DAG: return "X86ISD::ATOMSUB64_DAG";
case X86ISD::ATOMOR64_DAG: return "X86ISD::ATOMOR64_DAG";
case X86ISD::ATOMXOR64_DAG: return "X86ISD::ATOMXOR64_DAG";
case X86ISD::ATOMAND64_DAG: return "X86ISD::ATOMAND64_DAG";
case X86ISD::ATOMNAND64_DAG: return "X86ISD::ATOMNAND64_DAG";
case X86ISD::VZEXT_MOVL: return "X86ISD::VZEXT_MOVL";
case X86ISD::VZEXT_LOAD: return "X86ISD::VZEXT_LOAD";
case X86ISD::VZEXT: return "X86ISD::VZEXT";
@ -16952,685 +16857,6 @@ static MachineBasicBlock *EmitXBegin(MachineInstr *MI, MachineBasicBlock *MBB,
return sinkMBB;
}
// Get CMPXCHG opcode for the specified data type.
static unsigned getCmpXChgOpcode(EVT VT) {
switch (VT.getSimpleVT().SimpleTy) {
case MVT::i8: return X86::LCMPXCHG8;
case MVT::i16: return X86::LCMPXCHG16;
case MVT::i32: return X86::LCMPXCHG32;
case MVT::i64: return X86::LCMPXCHG64;
default:
break;
}
llvm_unreachable("Invalid operand size!");
}
// Get LOAD opcode for the specified data type.
static unsigned getLoadOpcode(EVT VT) {
switch (VT.getSimpleVT().SimpleTy) {
case MVT::i8: return X86::MOV8rm;
case MVT::i16: return X86::MOV16rm;
case MVT::i32: return X86::MOV32rm;
case MVT::i64: return X86::MOV64rm;
default:
break;
}
llvm_unreachable("Invalid operand size!");
}
// Get opcode of the non-atomic one from the specified atomic instruction.
static unsigned getNonAtomicOpcode(unsigned Opc) {
switch (Opc) {
case X86::ATOMAND8: return X86::AND8rr;
case X86::ATOMAND16: return X86::AND16rr;
case X86::ATOMAND32: return X86::AND32rr;
case X86::ATOMAND64: return X86::AND64rr;
case X86::ATOMOR8: return X86::OR8rr;
case X86::ATOMOR16: return X86::OR16rr;
case X86::ATOMOR32: return X86::OR32rr;
case X86::ATOMOR64: return X86::OR64rr;
case X86::ATOMXOR8: return X86::XOR8rr;
case X86::ATOMXOR16: return X86::XOR16rr;
case X86::ATOMXOR32: return X86::XOR32rr;
case X86::ATOMXOR64: return X86::XOR64rr;
}
llvm_unreachable("Unhandled atomic-load-op opcode!");
}
// Get opcode of the non-atomic one from the specified atomic instruction with
// extra opcode.
static unsigned getNonAtomicOpcodeWithExtraOpc(unsigned Opc,
unsigned &ExtraOpc) {
switch (Opc) {
case X86::ATOMNAND8: ExtraOpc = X86::NOT8r; return X86::AND8rr;
case X86::ATOMNAND16: ExtraOpc = X86::NOT16r; return X86::AND16rr;
case X86::ATOMNAND32: ExtraOpc = X86::NOT32r; return X86::AND32rr;
case X86::ATOMNAND64: ExtraOpc = X86::NOT64r; return X86::AND64rr;
case X86::ATOMMAX8: ExtraOpc = X86::CMP8rr; return X86::CMOVL32rr;
case X86::ATOMMAX16: ExtraOpc = X86::CMP16rr; return X86::CMOVL16rr;
case X86::ATOMMAX32: ExtraOpc = X86::CMP32rr; return X86::CMOVL32rr;
case X86::ATOMMAX64: ExtraOpc = X86::CMP64rr; return X86::CMOVL64rr;
case X86::ATOMMIN8: ExtraOpc = X86::CMP8rr; return X86::CMOVG32rr;
case X86::ATOMMIN16: ExtraOpc = X86::CMP16rr; return X86::CMOVG16rr;
case X86::ATOMMIN32: ExtraOpc = X86::CMP32rr; return X86::CMOVG32rr;
case X86::ATOMMIN64: ExtraOpc = X86::CMP64rr; return X86::CMOVG64rr;
case X86::ATOMUMAX8: ExtraOpc = X86::CMP8rr; return X86::CMOVB32rr;
case X86::ATOMUMAX16: ExtraOpc = X86::CMP16rr; return X86::CMOVB16rr;
case X86::ATOMUMAX32: ExtraOpc = X86::CMP32rr; return X86::CMOVB32rr;
case X86::ATOMUMAX64: ExtraOpc = X86::CMP64rr; return X86::CMOVB64rr;
case X86::ATOMUMIN8: ExtraOpc = X86::CMP8rr; return X86::CMOVA32rr;
case X86::ATOMUMIN16: ExtraOpc = X86::CMP16rr; return X86::CMOVA16rr;
case X86::ATOMUMIN32: ExtraOpc = X86::CMP32rr; return X86::CMOVA32rr;
case X86::ATOMUMIN64: ExtraOpc = X86::CMP64rr; return X86::CMOVA64rr;
}
llvm_unreachable("Unhandled atomic-load-op opcode!");
}
// Get opcode of the non-atomic one from the specified atomic instruction for
// 64-bit data type on 32-bit target.
static unsigned getNonAtomic6432Opcode(unsigned Opc, unsigned &HiOpc) {
switch (Opc) {
case X86::ATOMAND6432: HiOpc = X86::AND32rr; return X86::AND32rr;
case X86::ATOMOR6432: HiOpc = X86::OR32rr; return X86::OR32rr;
case X86::ATOMXOR6432: HiOpc = X86::XOR32rr; return X86::XOR32rr;
case X86::ATOMADD6432: HiOpc = X86::ADC32rr; return X86::ADD32rr;
case X86::ATOMSUB6432: HiOpc = X86::SBB32rr; return X86::SUB32rr;
case X86::ATOMSWAP6432: HiOpc = X86::MOV32rr; return X86::MOV32rr;
case X86::ATOMMAX6432: HiOpc = X86::SETLr; return X86::SETLr;
case X86::ATOMMIN6432: HiOpc = X86::SETGr; return X86::SETGr;
case X86::ATOMUMAX6432: HiOpc = X86::SETBr; return X86::SETBr;
case X86::ATOMUMIN6432: HiOpc = X86::SETAr; return X86::SETAr;
}
llvm_unreachable("Unhandled atomic-load-op opcode!");
}
// Get opcode of the non-atomic one from the specified atomic instruction for
// 64-bit data type on 32-bit target with extra opcode.
static unsigned getNonAtomic6432OpcodeWithExtraOpc(unsigned Opc,
unsigned &HiOpc,
unsigned &ExtraOpc) {
switch (Opc) {
case X86::ATOMNAND6432:
ExtraOpc = X86::NOT32r;
HiOpc = X86::AND32rr;
return X86::AND32rr;
}
llvm_unreachable("Unhandled atomic-load-op opcode!");
}
// Get pseudo CMOV opcode from the specified data type.
static unsigned getPseudoCMOVOpc(EVT VT) {
switch (VT.getSimpleVT().SimpleTy) {
case MVT::i8: return X86::CMOV_GR8;
case MVT::i16: return X86::CMOV_GR16;
case MVT::i32: return X86::CMOV_GR32;
default:
break;
}
llvm_unreachable("Unknown CMOV opcode!");
}
// EmitAtomicLoadArith - emit the code sequence for pseudo atomic instructions.
// They will be translated into a spin-loop or compare-exchange loop from
//
// ...
// dst = atomic-fetch-op MI.addr, MI.val
// ...
//
// to
//
// ...
// t1 = LOAD MI.addr
// loop:
// t4 = phi(t1, t3 / loop)
// t2 = OP MI.val, t4
// EAX = t4
// LCMPXCHG [MI.addr], t2, [EAX is implicitly used & defined]
// t3 = EAX
// JNE loop
// sink:
// dst = t3
// ...
MachineBasicBlock *
X86TargetLowering::EmitAtomicLoadArith(MachineInstr *MI,
MachineBasicBlock *MBB) const {
MachineFunction *MF = MBB->getParent();
const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
DebugLoc DL = MI->getDebugLoc();
MachineRegisterInfo &MRI = MF->getRegInfo();
const BasicBlock *BB = MBB->getBasicBlock();
MachineFunction::iterator I = MBB;
++I;
assert(MI->getNumOperands() <= X86::AddrNumOperands + 4 &&
"Unexpected number of operands");
assert(MI->hasOneMemOperand() &&
"Expected atomic-load-op to have one memoperand");
// Memory Reference
MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
unsigned DstReg, SrcReg;
unsigned MemOpndSlot;
unsigned CurOp = 0;
DstReg = MI->getOperand(CurOp++).getReg();
MemOpndSlot = CurOp;
CurOp += X86::AddrNumOperands;
SrcReg = MI->getOperand(CurOp++).getReg();
const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
MVT::SimpleValueType VT = *RC->vt_begin();
unsigned t1 = MRI.createVirtualRegister(RC);
unsigned t2 = MRI.createVirtualRegister(RC);
unsigned t3 = MRI.createVirtualRegister(RC);
unsigned t4 = MRI.createVirtualRegister(RC);
unsigned PhyReg = getX86SubSuperRegister(X86::EAX, VT);
unsigned LCMPXCHGOpc = getCmpXChgOpcode(VT);
unsigned LOADOpc = getLoadOpcode(VT);
// For the atomic load-arith operator, we generate
//
// thisMBB:
// t1 = LOAD [MI.addr]
// mainMBB:
// t4 = phi(t1 / thisMBB, t3 / mainMBB)
// t1 = OP MI.val, EAX
// EAX = t4
// LCMPXCHG [MI.addr], t1, [EAX is implicitly used & defined]
// t3 = EAX
// JNE mainMBB
// sinkMBB:
// dst = t3
MachineBasicBlock *thisMBB = MBB;
MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
MF->insert(I, mainMBB);
MF->insert(I, sinkMBB);
MachineInstrBuilder MIB;
// Transfer the remainder of BB and its successor edges to sinkMBB.
sinkMBB->splice(sinkMBB->begin(), MBB,
std::next(MachineBasicBlock::iterator(MI)), MBB->end());
sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
// thisMBB:
MIB = BuildMI(thisMBB, DL, TII->get(LOADOpc), t1);
for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
MachineOperand NewMO = MI->getOperand(MemOpndSlot + i);
if (NewMO.isReg())
NewMO.setIsKill(false);
MIB.addOperand(NewMO);
}
for (MachineInstr::mmo_iterator MMOI = MMOBegin; MMOI != MMOEnd; ++MMOI) {
unsigned flags = (*MMOI)->getFlags();
flags = (flags & ~MachineMemOperand::MOStore) | MachineMemOperand::MOLoad;
MachineMemOperand *MMO =
MF->getMachineMemOperand((*MMOI)->getPointerInfo(), flags,
(*MMOI)->getSize(),
(*MMOI)->getBaseAlignment(),
(*MMOI)->getTBAAInfo(),
(*MMOI)->getRanges());
MIB.addMemOperand(MMO);
}
thisMBB->addSuccessor(mainMBB);
// mainMBB:
MachineBasicBlock *origMainMBB = mainMBB;
// Add a PHI.
MachineInstr *Phi = BuildMI(mainMBB, DL, TII->get(X86::PHI), t4)
.addReg(t1).addMBB(thisMBB).addReg(t3).addMBB(mainMBB);
unsigned Opc = MI->getOpcode();
switch (Opc) {
default:
llvm_unreachable("Unhandled atomic-load-op opcode!");
case X86::ATOMAND8:
case X86::ATOMAND16:
case X86::ATOMAND32:
case X86::ATOMAND64:
case X86::ATOMOR8:
case X86::ATOMOR16:
case X86::ATOMOR32:
case X86::ATOMOR64:
case X86::ATOMXOR8:
case X86::ATOMXOR16:
case X86::ATOMXOR32:
case X86::ATOMXOR64: {
unsigned ARITHOpc = getNonAtomicOpcode(Opc);
BuildMI(mainMBB, DL, TII->get(ARITHOpc), t2).addReg(SrcReg)
.addReg(t4);
break;
}
case X86::ATOMNAND8:
case X86::ATOMNAND16:
case X86::ATOMNAND32:
case X86::ATOMNAND64: {
unsigned Tmp = MRI.createVirtualRegister(RC);
unsigned NOTOpc;
unsigned ANDOpc = getNonAtomicOpcodeWithExtraOpc(Opc, NOTOpc);
BuildMI(mainMBB, DL, TII->get(ANDOpc), Tmp).addReg(SrcReg)
.addReg(t4);
BuildMI(mainMBB, DL, TII->get(NOTOpc), t2).addReg(Tmp);
break;
}
case X86::ATOMMAX8:
case X86::ATOMMAX16:
case X86::ATOMMAX32:
case X86::ATOMMAX64:
case X86::ATOMMIN8:
case X86::ATOMMIN16:
case X86::ATOMMIN32:
case X86::ATOMMIN64:
case X86::ATOMUMAX8:
case X86::ATOMUMAX16:
case X86::ATOMUMAX32:
case X86::ATOMUMAX64:
case X86::ATOMUMIN8:
case X86::ATOMUMIN16:
case X86::ATOMUMIN32:
case X86::ATOMUMIN64: {
unsigned CMPOpc;
unsigned CMOVOpc = getNonAtomicOpcodeWithExtraOpc(Opc, CMPOpc);
BuildMI(mainMBB, DL, TII->get(CMPOpc))
.addReg(SrcReg)
.addReg(t4);
if (Subtarget->hasCMov()) {
if (VT != MVT::i8) {
// Native support
BuildMI(mainMBB, DL, TII->get(CMOVOpc), t2)
.addReg(SrcReg)
.addReg(t4);
} else {
// Promote i8 to i32 to use CMOV32
const TargetRegisterInfo* TRI = MF->getTarget().getRegisterInfo();
const TargetRegisterClass *RC32 =
TRI->getSubClassWithSubReg(getRegClassFor(MVT::i32), X86::sub_8bit);
unsigned SrcReg32 = MRI.createVirtualRegister(RC32);
unsigned AccReg32 = MRI.createVirtualRegister(RC32);
unsigned Tmp = MRI.createVirtualRegister(RC32);
unsigned Undef = MRI.createVirtualRegister(RC32);
BuildMI(mainMBB, DL, TII->get(TargetOpcode::IMPLICIT_DEF), Undef);
BuildMI(mainMBB, DL, TII->get(TargetOpcode::INSERT_SUBREG), SrcReg32)
.addReg(Undef)
.addReg(SrcReg)
.addImm(X86::sub_8bit);
BuildMI(mainMBB, DL, TII->get(TargetOpcode::INSERT_SUBREG), AccReg32)
.addReg(Undef)
.addReg(t4)
.addImm(X86::sub_8bit);
BuildMI(mainMBB, DL, TII->get(CMOVOpc), Tmp)
.addReg(SrcReg32)
.addReg(AccReg32);
BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), t2)
.addReg(Tmp, 0, X86::sub_8bit);
}
} else {
// Use pseudo select and lower them.
assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) &&
"Invalid atomic-load-op transformation!");
unsigned SelOpc = getPseudoCMOVOpc(VT);
X86::CondCode CC = X86::getCondFromCMovOpc(CMOVOpc);
assert(CC != X86::COND_INVALID && "Invalid atomic-load-op transformation!");
MIB = BuildMI(mainMBB, DL, TII->get(SelOpc), t2)
.addReg(SrcReg).addReg(t4)
.addImm(CC);
mainMBB = EmitLoweredSelect(MIB, mainMBB);
// Replace the original PHI node as mainMBB is changed after CMOV
// lowering.
BuildMI(*origMainMBB, Phi, DL, TII->get(X86::PHI), t4)
.addReg(t1).addMBB(thisMBB).addReg(t3).addMBB(mainMBB);
Phi->eraseFromParent();
}
break;
}
}
// Copy PhyReg back from virtual register.
BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), PhyReg)
.addReg(t4);
MIB = BuildMI(mainMBB, DL, TII->get(LCMPXCHGOpc));
for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
MachineOperand NewMO = MI->getOperand(MemOpndSlot + i);
if (NewMO.isReg())
NewMO.setIsKill(false);
MIB.addOperand(NewMO);
}
MIB.addReg(t2);
MIB.setMemRefs(MMOBegin, MMOEnd);
// Copy PhyReg back to virtual register.
BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), t3)
.addReg(PhyReg);
BuildMI(mainMBB, DL, TII->get(X86::JNE_4)).addMBB(origMainMBB);
mainMBB->addSuccessor(origMainMBB);
mainMBB->addSuccessor(sinkMBB);
// sinkMBB:
BuildMI(*sinkMBB, sinkMBB->begin(), DL,
TII->get(TargetOpcode::COPY), DstReg)
.addReg(t3);
MI->eraseFromParent();
return sinkMBB;
}
// EmitAtomicLoadArith6432 - emit the code sequence for pseudo atomic
// instructions. They will be translated into a spin-loop or compare-exchange
// loop from
//
// ...
// dst = atomic-fetch-op MI.addr, MI.val
// ...
//
// to
//
// ...
// t1L = LOAD [MI.addr + 0]
// t1H = LOAD [MI.addr + 4]
// loop:
// t4L = phi(t1L, t3L / loop)
// t4H = phi(t1H, t3H / loop)
// t2L = OP MI.val.lo, t4L
// t2H = OP MI.val.hi, t4H
// EAX = t4L
// EDX = t4H
// EBX = t2L
// ECX = t2H
// LCMPXCHG8B [MI.addr], [ECX:EBX & EDX:EAX are implicitly used and EDX:EAX is implicitly defined]
// t3L = EAX
// t3H = EDX
// JNE loop
// sink:
// dstL = t3L
// dstH = t3H
// ...
MachineBasicBlock *
X86TargetLowering::EmitAtomicLoadArith6432(MachineInstr *MI,
MachineBasicBlock *MBB) const {
MachineFunction *MF = MBB->getParent();
const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
DebugLoc DL = MI->getDebugLoc();
MachineRegisterInfo &MRI = MF->getRegInfo();
const BasicBlock *BB = MBB->getBasicBlock();
MachineFunction::iterator I = MBB;
++I;
assert(MI->getNumOperands() <= X86::AddrNumOperands + 7 &&
"Unexpected number of operands");
assert(MI->hasOneMemOperand() &&
"Expected atomic-load-op32 to have one memoperand");
// Memory Reference
MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
unsigned DstLoReg, DstHiReg;
unsigned SrcLoReg, SrcHiReg;
unsigned MemOpndSlot;
unsigned CurOp = 0;
DstLoReg = MI->getOperand(CurOp++).getReg();
DstHiReg = MI->getOperand(CurOp++).getReg();
MemOpndSlot = CurOp;
CurOp += X86::AddrNumOperands;
SrcLoReg = MI->getOperand(CurOp++).getReg();
SrcHiReg = MI->getOperand(CurOp++).getReg();
const TargetRegisterClass *RC = &X86::GR32RegClass;
const TargetRegisterClass *RC8 = &X86::GR8RegClass;
unsigned t1L = MRI.createVirtualRegister(RC);
unsigned t1H = MRI.createVirtualRegister(RC);
unsigned t2L = MRI.createVirtualRegister(RC);
unsigned t2H = MRI.createVirtualRegister(RC);
unsigned t3L = MRI.createVirtualRegister(RC);
unsigned t3H = MRI.createVirtualRegister(RC);
unsigned t4L = MRI.createVirtualRegister(RC);
unsigned t4H = MRI.createVirtualRegister(RC);
unsigned LCMPXCHGOpc = X86::LCMPXCHG8B;
unsigned LOADOpc = X86::MOV32rm;
// For the atomic load-arith operator, we generate
//
// thisMBB:
// t1L = LOAD [MI.addr + 0]
// t1H = LOAD [MI.addr + 4]
// mainMBB:
// t4L = phi(t1L / thisMBB, t3L / mainMBB)
// t4H = phi(t1H / thisMBB, t3H / mainMBB)
// t2L = OP MI.val.lo, t4L
// t2H = OP MI.val.hi, t4H
// EBX = t2L
// ECX = t2H
// LCMPXCHG8B [MI.addr], [ECX:EBX & EDX:EAX are implicitly used and EDX:EAX is implicitly defined]
// t3L = EAX
// t3H = EDX
// JNE loop
// sinkMBB:
// dstL = t3L
// dstH = t3H
MachineBasicBlock *thisMBB = MBB;
MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
MF->insert(I, mainMBB);
MF->insert(I, sinkMBB);
MachineInstrBuilder MIB;
// Transfer the remainder of BB and its successor edges to sinkMBB.
sinkMBB->splice(sinkMBB->begin(), MBB,
std::next(MachineBasicBlock::iterator(MI)), MBB->end());
sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
// thisMBB:
// Lo
MIB = BuildMI(thisMBB, DL, TII->get(LOADOpc), t1L);
for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
MachineOperand NewMO = MI->getOperand(MemOpndSlot + i);
if (NewMO.isReg())
NewMO.setIsKill(false);
MIB.addOperand(NewMO);
}
for (MachineInstr::mmo_iterator MMOI = MMOBegin; MMOI != MMOEnd; ++MMOI) {
unsigned flags = (*MMOI)->getFlags();
flags = (flags & ~MachineMemOperand::MOStore) | MachineMemOperand::MOLoad;
MachineMemOperand *MMO =
MF->getMachineMemOperand((*MMOI)->getPointerInfo(), flags,
(*MMOI)->getSize(),
(*MMOI)->getBaseAlignment(),
(*MMOI)->getTBAAInfo(),
(*MMOI)->getRanges());
MIB.addMemOperand(MMO);
};
MachineInstr *LowMI = MIB;
// Hi
MIB = BuildMI(thisMBB, DL, TII->get(LOADOpc), t1H);
for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
if (i == X86::AddrDisp) {
MIB.addDisp(MI->getOperand(MemOpndSlot + i), 4); // 4 == sizeof(i32)
} else {
MachineOperand NewMO = MI->getOperand(MemOpndSlot + i);
if (NewMO.isReg())
NewMO.setIsKill(false);
MIB.addOperand(NewMO);
}
}
MIB.setMemRefs(LowMI->memoperands_begin(), LowMI->memoperands_end());
thisMBB->addSuccessor(mainMBB);
// mainMBB:
MachineBasicBlock *origMainMBB = mainMBB;
// Add PHIs.
MachineInstr *PhiL = BuildMI(mainMBB, DL, TII->get(X86::PHI), t4L)
.addReg(t1L).addMBB(thisMBB).addReg(t3L).addMBB(mainMBB);
MachineInstr *PhiH = BuildMI(mainMBB, DL, TII->get(X86::PHI), t4H)
.addReg(t1H).addMBB(thisMBB).addReg(t3H).addMBB(mainMBB);
unsigned Opc = MI->getOpcode();
switch (Opc) {
default:
llvm_unreachable("Unhandled atomic-load-op6432 opcode!");
case X86::ATOMAND6432:
case X86::ATOMOR6432:
case X86::ATOMXOR6432:
case X86::ATOMADD6432:
case X86::ATOMSUB6432: {
unsigned HiOpc;
unsigned LoOpc = getNonAtomic6432Opcode(Opc, HiOpc);
BuildMI(mainMBB, DL, TII->get(LoOpc), t2L).addReg(t4L)
.addReg(SrcLoReg);
BuildMI(mainMBB, DL, TII->get(HiOpc), t2H).addReg(t4H)
.addReg(SrcHiReg);
break;
}
case X86::ATOMNAND6432: {
unsigned HiOpc, NOTOpc;
unsigned LoOpc = getNonAtomic6432OpcodeWithExtraOpc(Opc, HiOpc, NOTOpc);
unsigned TmpL = MRI.createVirtualRegister(RC);
unsigned TmpH = MRI.createVirtualRegister(RC);
BuildMI(mainMBB, DL, TII->get(LoOpc), TmpL).addReg(SrcLoReg)
.addReg(t4L);
BuildMI(mainMBB, DL, TII->get(HiOpc), TmpH).addReg(SrcHiReg)
.addReg(t4H);
BuildMI(mainMBB, DL, TII->get(NOTOpc), t2L).addReg(TmpL);
BuildMI(mainMBB, DL, TII->get(NOTOpc), t2H).addReg(TmpH);
break;
}
case X86::ATOMMAX6432:
case X86::ATOMMIN6432:
case X86::ATOMUMAX6432:
case X86::ATOMUMIN6432: {
unsigned HiOpc;
unsigned LoOpc = getNonAtomic6432Opcode(Opc, HiOpc);
unsigned cL = MRI.createVirtualRegister(RC8);
unsigned cH = MRI.createVirtualRegister(RC8);
unsigned cL32 = MRI.createVirtualRegister(RC);
unsigned cH32 = MRI.createVirtualRegister(RC);
unsigned cc = MRI.createVirtualRegister(RC);
// cl := cmp src_lo, lo
BuildMI(mainMBB, DL, TII->get(X86::CMP32rr))
.addReg(SrcLoReg).addReg(t4L);
BuildMI(mainMBB, DL, TII->get(LoOpc), cL);
BuildMI(mainMBB, DL, TII->get(X86::MOVZX32rr8), cL32).addReg(cL);
// ch := cmp src_hi, hi
BuildMI(mainMBB, DL, TII->get(X86::CMP32rr))
.addReg(SrcHiReg).addReg(t4H);
BuildMI(mainMBB, DL, TII->get(HiOpc), cH);
BuildMI(mainMBB, DL, TII->get(X86::MOVZX32rr8), cH32).addReg(cH);
// cc := if (src_hi == hi) ? cl : ch;
if (Subtarget->hasCMov()) {
BuildMI(mainMBB, DL, TII->get(X86::CMOVE32rr), cc)
.addReg(cH32).addReg(cL32);
} else {
MIB = BuildMI(mainMBB, DL, TII->get(X86::CMOV_GR32), cc)
.addReg(cH32).addReg(cL32)
.addImm(X86::COND_E);
mainMBB = EmitLoweredSelect(MIB, mainMBB);
}
BuildMI(mainMBB, DL, TII->get(X86::TEST32rr)).addReg(cc).addReg(cc);
if (Subtarget->hasCMov()) {
BuildMI(mainMBB, DL, TII->get(X86::CMOVNE32rr), t2L)
.addReg(SrcLoReg).addReg(t4L);
BuildMI(mainMBB, DL, TII->get(X86::CMOVNE32rr), t2H)
.addReg(SrcHiReg).addReg(t4H);
} else {
MIB = BuildMI(mainMBB, DL, TII->get(X86::CMOV_GR32), t2L)
.addReg(SrcLoReg).addReg(t4L)
.addImm(X86::COND_NE);
mainMBB = EmitLoweredSelect(MIB, mainMBB);
// As the lowered CMOV won't clobber EFLAGS, we could reuse it for the
// 2nd CMOV lowering.
mainMBB->addLiveIn(X86::EFLAGS);
MIB = BuildMI(mainMBB, DL, TII->get(X86::CMOV_GR32), t2H)
.addReg(SrcHiReg).addReg(t4H)
.addImm(X86::COND_NE);
mainMBB = EmitLoweredSelect(MIB, mainMBB);
// Replace the original PHI node as mainMBB is changed after CMOV
// lowering.
BuildMI(*origMainMBB, PhiL, DL, TII->get(X86::PHI), t4L)
.addReg(t1L).addMBB(thisMBB).addReg(t3L).addMBB(mainMBB);
BuildMI(*origMainMBB, PhiH, DL, TII->get(X86::PHI), t4H)
.addReg(t1H).addMBB(thisMBB).addReg(t3H).addMBB(mainMBB);
PhiL->eraseFromParent();
PhiH->eraseFromParent();
}
break;
}
case X86::ATOMSWAP6432: {
unsigned HiOpc;
unsigned LoOpc = getNonAtomic6432Opcode(Opc, HiOpc);
BuildMI(mainMBB, DL, TII->get(LoOpc), t2L).addReg(SrcLoReg);
BuildMI(mainMBB, DL, TII->get(HiOpc), t2H).addReg(SrcHiReg);
break;
}
}
// Copy EDX:EAX back from HiReg:LoReg
BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::EAX).addReg(t4L);
BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::EDX).addReg(t4H);
// Copy ECX:EBX from t1H:t1L
BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::EBX).addReg(t2L);
BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::ECX).addReg(t2H);
MIB = BuildMI(mainMBB, DL, TII->get(LCMPXCHGOpc));
for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
MachineOperand NewMO = MI->getOperand(MemOpndSlot + i);
if (NewMO.isReg())
NewMO.setIsKill(false);
MIB.addOperand(NewMO);
}
MIB.setMemRefs(MMOBegin, MMOEnd);
// Copy EDX:EAX back to t3H:t3L
BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), t3L).addReg(X86::EAX);
BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), t3H).addReg(X86::EDX);
BuildMI(mainMBB, DL, TII->get(X86::JNE_4)).addMBB(origMainMBB);
mainMBB->addSuccessor(origMainMBB);
mainMBB->addSuccessor(sinkMBB);
// sinkMBB:
BuildMI(*sinkMBB, sinkMBB->begin(), DL,
TII->get(TargetOpcode::COPY), DstLoReg)
.addReg(t3L);
BuildMI(*sinkMBB, sinkMBB->begin(), DL,
TII->get(TargetOpcode::COPY), DstHiReg)
.addReg(t3H);
MI->eraseFromParent();
return sinkMBB;
}
// FIXME: When we get size specific XMM0 registers, i.e. XMM0_V16I8
// or XMM0_V32I8 in AVX all of this code can be replaced with that
// in the .td file.
@ -18840,62 +18066,6 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
case X86::XBEGIN:
return EmitXBegin(MI, BB, BB->getParent()->getTarget().getInstrInfo());
// Atomic Lowering.
case X86::ATOMAND8:
case X86::ATOMAND16:
case X86::ATOMAND32:
case X86::ATOMAND64:
// Fall through
case X86::ATOMOR8:
case X86::ATOMOR16:
case X86::ATOMOR32:
case X86::ATOMOR64:
// Fall through
case X86::ATOMXOR16:
case X86::ATOMXOR8:
case X86::ATOMXOR32:
case X86::ATOMXOR64:
// Fall through
case X86::ATOMNAND8:
case X86::ATOMNAND16:
case X86::ATOMNAND32:
case X86::ATOMNAND64:
// Fall through
case X86::ATOMMAX8:
case X86::ATOMMAX16:
case X86::ATOMMAX32:
case X86::ATOMMAX64:
// Fall through
case X86::ATOMMIN8:
case X86::ATOMMIN16:
case X86::ATOMMIN32:
case X86::ATOMMIN64:
// Fall through
case X86::ATOMUMAX8:
case X86::ATOMUMAX16:
case X86::ATOMUMAX32:
case X86::ATOMUMAX64:
// Fall through
case X86::ATOMUMIN8:
case X86::ATOMUMIN16:
case X86::ATOMUMIN32:
case X86::ATOMUMIN64:
return EmitAtomicLoadArith(MI, BB);
// This group does 64-bit operations on a 32-bit host.
case X86::ATOMAND6432:
case X86::ATOMOR6432:
case X86::ATOMXOR6432:
case X86::ATOMNAND6432:
case X86::ATOMADD6432:
case X86::ATOMSUB6432:
case X86::ATOMMAX6432:
case X86::ATOMMIN6432:
case X86::ATOMUMAX6432:
case X86::ATOMUMIN6432:
case X86::ATOMSWAP6432:
return EmitAtomicLoadArith6432(MI, BB);
case X86::VASTART_SAVE_XMM_REGS:
return EmitVAStartSaveXMMRegsWithCustomInserter(MI, BB);

View File

@ -405,23 +405,8 @@ namespace llvm {
// XTEST - Test if in transactional execution.
XTEST,
// ATOMADD64_DAG, ATOMSUB64_DAG, ATOMOR64_DAG, ATOMAND64_DAG,
// ATOMXOR64_DAG, ATOMNAND64_DAG, ATOMSWAP64_DAG -
// Atomic 64-bit binary operations.
ATOMADD64_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE,
ATOMSUB64_DAG,
ATOMOR64_DAG,
ATOMXOR64_DAG,
ATOMAND64_DAG,
ATOMNAND64_DAG,
ATOMMAX64_DAG,
ATOMMIN64_DAG,
ATOMUMAX64_DAG,
ATOMUMIN64_DAG,
ATOMSWAP64_DAG,
// LCMPXCHG_DAG, LCMPXCHG8_DAG, LCMPXCHG16_DAG - Compare and swap.
LCMPXCHG_DAG,
LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE,
LCMPXCHG8_DAG,
LCMPXCHG16_DAG,

View File

@ -521,83 +521,6 @@ def CMOV_RFP80 : I<0, Pseudo,
} // UsesCustomInserter = 1, Uses = [EFLAGS]
//===----------------------------------------------------------------------===//
// Atomic Instruction Pseudo Instructions
//===----------------------------------------------------------------------===//
// Pseudo atomic instructions
multiclass PSEUDO_ATOMIC_LOAD_BINOP<string mnemonic> {
let usesCustomInserter = 1, mayLoad = 1, mayStore = 1 in {
let Defs = [EFLAGS, AL] in
def NAME#8 : I<0, Pseudo, (outs GR8:$dst),
(ins i8mem:$ptr, GR8:$val),
!strconcat(mnemonic, "8 PSEUDO!"), []>;
let Defs = [EFLAGS, AX] in
def NAME#16 : I<0, Pseudo,(outs GR16:$dst),
(ins i16mem:$ptr, GR16:$val),
!strconcat(mnemonic, "16 PSEUDO!"), []>;
let Defs = [EFLAGS, EAX] in
def NAME#32 : I<0, Pseudo, (outs GR32:$dst),
(ins i32mem:$ptr, GR32:$val),
!strconcat(mnemonic, "32 PSEUDO!"), []>;
let Defs = [EFLAGS, RAX] in
def NAME#64 : I<0, Pseudo, (outs GR64:$dst),
(ins i64mem:$ptr, GR64:$val),
!strconcat(mnemonic, "64 PSEUDO!"), []>;
}
}
multiclass PSEUDO_ATOMIC_LOAD_BINOP_PATS<string name, string frag> {
def : Pat<(!cast<PatFrag>(frag # "_8") addr:$ptr, GR8:$val),
(!cast<Instruction>(name # "8") addr:$ptr, GR8:$val)>;
def : Pat<(!cast<PatFrag>(frag # "_16") addr:$ptr, GR16:$val),
(!cast<Instruction>(name # "16") addr:$ptr, GR16:$val)>;
def : Pat<(!cast<PatFrag>(frag # "_32") addr:$ptr, GR32:$val),
(!cast<Instruction>(name # "32") addr:$ptr, GR32:$val)>;
def : Pat<(!cast<PatFrag>(frag # "_64") addr:$ptr, GR64:$val),
(!cast<Instruction>(name # "64") addr:$ptr, GR64:$val)>;
}
// Atomic exchange, and, or, xor
defm ATOMAND : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMAND">;
defm ATOMOR : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMOR">;
defm ATOMXOR : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMXOR">;
defm ATOMNAND : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMNAND">;
defm ATOMMAX : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMMAX">;
defm ATOMMIN : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMMIN">;
defm ATOMUMAX : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMUMAX">;
defm ATOMUMIN : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMUMIN">;
defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMAND", "atomic_load_and">;
defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMOR", "atomic_load_or">;
defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMXOR", "atomic_load_xor">;
defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMNAND", "atomic_load_nand">;
defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMMAX", "atomic_load_max">;
defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMMIN", "atomic_load_min">;
defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMUMAX", "atomic_load_umax">;
defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMUMIN", "atomic_load_umin">;
multiclass PSEUDO_ATOMIC_LOAD_BINOP6432<string mnemonic> {
let usesCustomInserter = 1, Defs = [EFLAGS, EAX, EDX],
mayLoad = 1, mayStore = 1, hasSideEffects = 0 in
def NAME#6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
(ins i64mem:$ptr, GR32:$val1, GR32:$val2),
!strconcat(mnemonic, "6432 PSEUDO!"), []>;
}
defm ATOMAND : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMAND">;
defm ATOMOR : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMOR">;
defm ATOMXOR : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMXOR">;
defm ATOMNAND : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMNAND">;
defm ATOMADD : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMADD">;
defm ATOMSUB : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMSUB">;
defm ATOMMAX : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMMAX">;
defm ATOMMIN : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMMIN">;
defm ATOMUMAX : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMUMAX">;
defm ATOMUMIN : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMUMIN">;
defm ATOMSWAP : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMSWAP">;
//===----------------------------------------------------------------------===//
// Normal-Instructions-With-Lock-Prefix Pseudo Instructions
//===----------------------------------------------------------------------===//

View File

@ -155,27 +155,6 @@ def X86cas16 : SDNode<"X86ISD::LCMPXCHG16_DAG", SDTX86caspair,
[SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore,
SDNPMayLoad, SDNPMemOperand]>;
def X86AtomAdd64 : SDNode<"X86ISD::ATOMADD64_DAG", SDTX86atomicBinary,
[SDNPHasChain, SDNPMayStore,
SDNPMayLoad, SDNPMemOperand]>;
def X86AtomSub64 : SDNode<"X86ISD::ATOMSUB64_DAG", SDTX86atomicBinary,
[SDNPHasChain, SDNPMayStore,
SDNPMayLoad, SDNPMemOperand]>;
def X86AtomOr64 : SDNode<"X86ISD::ATOMOR64_DAG", SDTX86atomicBinary,
[SDNPHasChain, SDNPMayStore,
SDNPMayLoad, SDNPMemOperand]>;
def X86AtomXor64 : SDNode<"X86ISD::ATOMXOR64_DAG", SDTX86atomicBinary,
[SDNPHasChain, SDNPMayStore,
SDNPMayLoad, SDNPMemOperand]>;
def X86AtomAnd64 : SDNode<"X86ISD::ATOMAND64_DAG", SDTX86atomicBinary,
[SDNPHasChain, SDNPMayStore,
SDNPMayLoad, SDNPMemOperand]>;
def X86AtomNand64 : SDNode<"X86ISD::ATOMNAND64_DAG", SDTX86atomicBinary,
[SDNPHasChain, SDNPMayStore,
SDNPMayLoad, SDNPMemOperand]>;
def X86AtomSwap64 : SDNode<"X86ISD::ATOMSWAP64_DAG", SDTX86atomicBinary,
[SDNPHasChain, SDNPMayStore,
SDNPMayLoad, SDNPMemOperand]>;
def X86retflag : SDNode<"X86ISD::RET_FLAG", SDTX86Ret,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;

View File

@ -111,6 +111,7 @@ public:
return *getX86TargetMachine().getSubtargetImpl();
}
void addIRPasses() override;
bool addInstSelector() override;
bool addILPOpts() override;
bool addPreRegAlloc() override;
@ -123,6 +124,12 @@ TargetPassConfig *X86TargetMachine::createPassConfig(PassManagerBase &PM) {
return new X86PassConfig(this, PM);
}
void X86PassConfig::addIRPasses() {
addPass(createX86AtomicExpandPass(&getX86TargetMachine()));
TargetPassConfig::addIRPasses();
}
bool X86PassConfig::addInstSelector() {
// Install an instruction selector.
addPass(createX86ISelDag(getX86TargetMachine(), getOptLevel()));

View File

@ -11,9 +11,9 @@ entry:
; CHECK: movl 4([[REG]]), %edx
; CHECK: LBB0_1:
; CHECK: movl %eax, %ebx
; CHECK: addl {{%[a-z]+}}, %ebx
; CHECK: addl $1, %ebx
; CHECK: movl %edx, %ecx
; CHECK: adcl {{%[a-z]+}}, %ecx
; CHECK: adcl $0, %ecx
; CHECK: lock
; CHECK-NEXT: cmpxchg8b ([[REG]])
; CHECK-NEXT: jne

View File

@ -1,5 +1,5 @@
; RUN: llc < %s -march=x86-64 > %t.x86-64
; RUN: llc < %s -march=x86 > %t.x86
; RUN: llc < %s -march=x86 -mattr=cx16 > %t.x86
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
target triple = "x86_64-apple-darwin8"

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -march=x86 -verify-machineinstrs | FileCheck %s
; RUN: llc < %s -mcpu=corei7 -march=x86 -verify-machineinstrs | FileCheck %s
; 64-bit load/store on x86-32
; FIXME: The generated code can be substantially improved.

View File

@ -1,6 +1,5 @@
; RUN: llc -march=x86 -mattr=+cmov -mtriple=i386-pc-linux -verify-machineinstrs < %s | FileCheck %s -check-prefix=LINUX
; RUN: llc -march=x86 -mattr=-cmov -mtriple=i386-pc-linux -verify-machineinstrs < %s | FileCheck %s -check-prefix=NOCMOV
; RUN: llc -march=x86 -mtriple=i386-macosx -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s -check-prefix=PIC
; RUN: llc -march=x86 -mattr=+cmov,cx16 -mtriple=i386-pc-linux -verify-machineinstrs < %s | FileCheck %s -check-prefix=LINUX
; RUN: llc -march=x86 -mattr=cx16 -mtriple=i386-macosx -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s -check-prefix=PIC
@sc64 = external global i64
@ -9,87 +8,39 @@ define void @atomic_maxmin_i6432() {
%1 = atomicrmw max i64* @sc64, i64 5 acquire
; LINUX: [[LABEL:.LBB[0-9]+_[0-9]+]]
; LINUX: cmpl
; LINUX: setl
; LINUX: cmpl
; LINUX: setl
; LINUX: seta
; LINUX: cmovne
; LINUX: cmovne
; LINUX: lock
; LINUX-NEXT: cmpxchg8b
; LINUX: jne [[LABEL]]
; NOCMOV: [[LABEL:.LBB[0-9]+_[0-9]+]]
; NOCMOV: cmpl
; NOCMOV: setl
; NOCMOV: cmpl
; NOCMOV: setl
; NOCMOV: jne
; NOCMOV: jne
; NOCMOV: lock
; NOCMOV-NEXT: cmpxchg8b
; NOCMOV: jne [[LABEL]]
%2 = atomicrmw min i64* @sc64, i64 6 acquire
; LINUX: [[LABEL:.LBB[0-9]+_[0-9]+]]
; LINUX: cmpl
; LINUX: setg
; LINUX: cmpl
; LINUX: setg
; LINUX: setb
; LINUX: cmovne
; LINUX: cmovne
; LINUX: lock
; LINUX-NEXT: cmpxchg8b
; LINUX: jne [[LABEL]]
; NOCMOV: [[LABEL:.LBB[0-9]+_[0-9]+]]
; NOCMOV: cmpl
; NOCMOV: setg
; NOCMOV: cmpl
; NOCMOV: setg
; NOCMOV: jne
; NOCMOV: jne
; NOCMOV: lock
; NOCMOV-NEXT: cmpxchg8b
; NOCMOV: jne [[LABEL]]
%3 = atomicrmw umax i64* @sc64, i64 7 acquire
; LINUX: [[LABEL:.LBB[0-9]+_[0-9]+]]
; LINUX: cmpl
; LINUX: setb
; LINUX: cmpl
; LINUX: setb
; LINUX: seta
; LINUX: cmovne
; LINUX: cmovne
; LINUX: lock
; LINUX-NEXT: cmpxchg8b
; LINUX: jne [[LABEL]]
; NOCMOV: [[LABEL:.LBB[0-9]+_[0-9]+]]
; NOCMOV: cmpl
; NOCMOV: setb
; NOCMOV: cmpl
; NOCMOV: setb
; NOCMOV: jne
; NOCMOV: jne
; NOCMOV: lock
; NOCMOV-NEXT: cmpxchg8b
; NOCMOV: jne [[LABEL]]
%4 = atomicrmw umin i64* @sc64, i64 8 acquire
; LINUX: [[LABEL:.LBB[0-9]+_[0-9]+]]
; LINUX: cmpl
; LINUX: seta
; LINUX: cmpl
; LINUX: seta
; LINUX: setb
; LINUX: cmovne
; LINUX: cmovne
; LINUX: lock
; LINUX-NEXT: cmpxchg8b
; LINUX: jne [[LABEL]]
; NOCMOV: [[LABEL:.LBB[0-9]+_[0-9]+]]
; NOCMOV: cmpl
; NOCMOV: seta
; NOCMOV: cmpl
; NOCMOV: seta
; NOCMOV: jne
; NOCMOV: jne
; NOCMOV: lock
; NOCMOV-NEXT: cmpxchg8b
; NOCMOV: jne [[LABEL]]
ret void
}
@ -98,8 +49,8 @@ define void @atomic_maxmin_i6432() {
define void @tf_bug(i8* %ptr) nounwind {
; PIC-LABEL: tf_bug:
; PIC: movl _id-L1$pb(
; PIC: movl (_id-L1$pb)+4(
; PIC-DAG: movl _id-L1$pb(
; PIC-DAG: movl (_id-L1$pb)+4(
%tmp1 = atomicrmw add i64* @id, i64 1 seq_cst
%tmp2 = add i64 %tmp1, 1
%tmp3 = bitcast i8* %ptr to i64*

View File

@ -0,0 +1,315 @@
; RUN: llc < %s -mtriple=x86_64-apple-macosx10.9 -verify-machineinstrs -mattr=cx16 | FileCheck %s
@var = global i128 0
define i128 @val_compare_and_swap(i128* %p, i128 %oldval, i128 %newval) {
; CHECK-LABEL: val_compare_and_swap:
; CHECK: movq %rsi, %rax
; CHECK: movq %rcx, %rbx
; CHECK: movq %r8, %rcx
; CHECK: lock
; CHECK: cmpxchg16b (%rdi)
%pair = cmpxchg i128* %p, i128 %oldval, i128 %newval acquire acquire
%val = extractvalue { i128, i1 } %pair, 0
ret i128 %val
}
define void @fetch_and_nand(i128* %p, i128 %bits) {
; CHECK-LABEL: fetch_and_nand:
; CHECK-DAG: movq %rdx, [[INCHI:%[a-z0-9]+]]
; CHECK-DAG: movq (%rdi), %rax
; CHECK-DAG: movq 8(%rdi), %rdx
; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]:
; CHECK: movq %rdx, %rcx
; CHECK: andq [[INCHI]], %rcx
; CHECK: movq %rax, %rbx
; INCLO equivalent comes in in %rsi, so it makes sense it stays there.
; CHECK: andq %rsi, %rbx
; CHECK: notq %rbx
; CHECK: notq %rcx
; CHECK: lock
; CHECK: cmpxchg16b (%rdi)
; CHECK: jne [[LOOP]]
; CHECK: movq %rax, _var
; CHECK: movq %rdx, _var+8
%val = atomicrmw nand i128* %p, i128 %bits release
store i128 %val, i128* @var, align 16
ret void
}
define void @fetch_and_or(i128* %p, i128 %bits) {
; CHECK-LABEL: fetch_and_or:
; CHECK-DAG: movq %rdx, [[INCHI:%[a-z0-9]+]]
; CHECK-DAG: movq (%rdi), %rax
; CHECK-DAG: movq 8(%rdi), %rdx
; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]:
; CHECK: movq %rax, %rbx
; INCLO equivalent comes in in %rsi, so it makes sense it stays there.
; CHECK: orq %rsi, %rbx
; CHECK: movq %rdx, %rcx
; CHECK: orq [[INCHI]], %rcx
; CHECK: lock
; CHECK: cmpxchg16b (%rdi)
; CHECK: jne [[LOOP]]
; CHECK: movq %rax, _var
; CHECK: movq %rdx, _var+8
%val = atomicrmw or i128* %p, i128 %bits seq_cst
store i128 %val, i128* @var, align 16
ret void
}
define void @fetch_and_add(i128* %p, i128 %bits) {
; CHECK-LABEL: fetch_and_add:
; CHECK-DAG: movq %rdx, [[INCHI:%[a-z0-9]+]]
; CHECK-DAG: movq (%rdi), %rax
; CHECK-DAG: movq 8(%rdi), %rdx
; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]:
; CHECK: movq %rax, %rbx
; INCLO equivalent comes in in %rsi, so it makes sense it stays there.
; CHECK: addq %rsi, %rbx
; CHECK: movq %rdx, %rcx
; CHECK: adcq [[INCHI]], %rcx
; CHECK: lock
; CHECK: cmpxchg16b (%rdi)
; CHECK: jne [[LOOP]]
; CHECK: movq %rax, _var
; CHECK: movq %rdx, _var+8
%val = atomicrmw add i128* %p, i128 %bits seq_cst
store i128 %val, i128* @var, align 16
ret void
}
define void @fetch_and_sub(i128* %p, i128 %bits) {
; CHECK-LABEL: fetch_and_sub:
; CHECK-DAG: movq %rdx, [[INCHI:%[a-z0-9]+]]
; CHECK-DAG: movq (%rdi), %rax
; CHECK-DAG: movq 8(%rdi), %rdx
; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]:
; CHECK: movq %rax, %rbx
; INCLO equivalent comes in in %rsi, so it makes sense it stays there.
; CHECK: subq %rsi, %rbx
; CHECK: movq %rdx, %rcx
; CHECK: sbbq [[INCHI]], %rcx
; CHECK: lock
; CHECK: cmpxchg16b (%rdi)
; CHECK: jne [[LOOP]]
; CHECK: movq %rax, _var
; CHECK: movq %rdx, _var+8
%val = atomicrmw sub i128* %p, i128 %bits seq_cst
store i128 %val, i128* @var, align 16
ret void
}
define void @fetch_and_min(i128* %p, i128 %bits) {
; CHECK-LABEL: fetch_and_min:
; CHECK-DAG: movq %rdx, [[INCHI:%[a-z0-9]+]]
; CHECK-DAG: movq (%rdi), %rax
; CHECK-DAG: movq 8(%rdi), %rdx
; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]:
; CHECK: cmpq %rsi, %rax
; CHECK: setbe [[CMP:%[a-z0-9]+]]
; CHECK: cmpq [[INCHI]], %rdx
; CHECK: setle [[HICMP:%[a-z0-9]+]]
; CHECK: je [[USE_LO:.?LBB[0-9]+_[0-9]+]]
; CHECK: movb [[HICMP]], [[CMP]]
; CHECK: [[USE_LO]]:
; CHECK: testb [[CMP]], [[CMP]]
; CHECK: movq %rsi, %rbx
; CHECK: cmovneq %rax, %rbx
; CHECK: movq [[INCHI]], %rcx
; CHECK: cmovneq %rdx, %rcx
; CHECK: lock
; CHECK: cmpxchg16b (%rdi)
; CHECK: jne [[LOOP]]
; CHECK: movq %rax, _var
; CHECK: movq %rdx, _var+8
%val = atomicrmw min i128* %p, i128 %bits seq_cst
store i128 %val, i128* @var, align 16
ret void
}
define void @fetch_and_max(i128* %p, i128 %bits) {
; CHECK-LABEL: fetch_and_max:
; CHECK-DAG: movq %rdx, [[INCHI:%[a-z0-9]+]]
; CHECK-DAG: movq (%rdi), %rax
; CHECK-DAG: movq 8(%rdi), %rdx
; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]:
; CHECK: cmpq %rsi, %rax
; CHECK: setae [[CMP:%[a-z0-9]+]]
; CHECK: cmpq [[INCHI]], %rdx
; CHECK: setge [[HICMP:%[a-z0-9]+]]
; CHECK: je [[USE_LO:.?LBB[0-9]+_[0-9]+]]
; CHECK: movb [[HICMP]], [[CMP]]
; CHECK: [[USE_LO]]:
; CHECK: testb [[CMP]], [[CMP]]
; CHECK: movq %rsi, %rbx
; CHECK: cmovneq %rax, %rbx
; CHECK: movq [[INCHI]], %rcx
; CHECK: cmovneq %rdx, %rcx
; CHECK: lock
; CHECK: cmpxchg16b (%rdi)
; CHECK: jne [[LOOP]]
; CHECK: movq %rax, _var
; CHECK: movq %rdx, _var+8
%val = atomicrmw max i128* %p, i128 %bits seq_cst
store i128 %val, i128* @var, align 16
ret void
}
define void @fetch_and_umin(i128* %p, i128 %bits) {
; CHECK-LABEL: fetch_and_umin:
; CHECK-DAG: movq %rdx, [[INCHI:%[a-z0-9]+]]
; CHECK-DAG: movq (%rdi), %rax
; CHECK-DAG: movq 8(%rdi), %rdx
; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]:
; CHECK: cmpq %rsi, %rax
; CHECK: setbe [[CMP:%[a-z0-9]+]]
; CHECK: cmpq [[INCHI]], %rdx
; CHECK: setbe [[HICMP:%[a-z0-9]+]]
; CHECK: je [[USE_LO:.?LBB[0-9]+_[0-9]+]]
; CHECK: movb [[HICMP]], [[CMP]]
; CHECK: [[USE_LO]]:
; CHECK: testb [[CMP]], [[CMP]]
; CHECK: movq %rsi, %rbx
; CHECK: cmovneq %rax, %rbx
; CHECK: movq [[INCHI]], %rcx
; CHECK: cmovneq %rdx, %rcx
; CHECK: lock
; CHECK: cmpxchg16b (%rdi)
; CHECK: jne [[LOOP]]
; CHECK: movq %rax, _var
; CHECK: movq %rdx, _var+8
%val = atomicrmw umin i128* %p, i128 %bits seq_cst
store i128 %val, i128* @var, align 16
ret void
}
define void @fetch_and_umax(i128* %p, i128 %bits) {
; CHECK-LABEL: fetch_and_umax:
; CHECK-DAG: movq %rdx, [[INCHI:%[a-z0-9]+]]
; CHECK-DAG: movq (%rdi), %rax
; CHECK-DAG: movq 8(%rdi), %rdx
; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]:
; CHECK: cmpq %rax, %rsi
; CHECK: setb [[CMP:%[a-z0-9]+]]
; CHECK: cmpq [[INCHI]], %rdx
; CHECK: seta [[HICMP:%[a-z0-9]+]]
; CHECK: je [[USE_LO:.?LBB[0-9]+_[0-9]+]]
; CHECK: movb [[HICMP]], [[CMP]]
; CHECK: [[USE_LO]]:
; CHECK: testb [[CMP]], [[CMP]]
; CHECK: movq %rsi, %rbx
; CHECK: cmovneq %rax, %rbx
; CHECK: movq [[INCHI]], %rcx
; CHECK: cmovneq %rdx, %rcx
; CHECK: lock
; CHECK: cmpxchg16b (%rdi)
; CHECK: jne [[LOOP]]
; CHECK: movq %rax, _var
; CHECK: movq %rdx, _var+8
%val = atomicrmw umax i128* %p, i128 %bits seq_cst
store i128 %val, i128* @var, align 16
ret void
}
define i128 @atomic_load_seq_cst(i128* %p) {
; CHECK-LABEL: atomic_load_seq_cst:
; CHECK: xorl %eax, %eax
; CHECK: xorl %edx, %edx
; CHECK: xorl %ebx, %ebx
; CHECK: xorl %ecx, %ecx
; CHECK: lock
; CHECK: cmpxchg16b (%rdi)
%r = load atomic i128* %p seq_cst, align 16
ret i128 %r
}
define i128 @atomic_load_relaxed(i128* %p) {
; CHECK: atomic_load_relaxed:
; CHECK: xorl %eax, %eax
; CHECK: xorl %edx, %edx
; CHECK: xorl %ebx, %ebx
; CHECK: xorl %ecx, %ecx
; CHECK: lock
; CHECK: cmpxchg16b (%rdi)
%r = load atomic i128* %p monotonic, align 16
ret i128 %r
}
define void @atomic_store_seq_cst(i128* %p, i128 %in) {
; CHECK-LABEL: atomic_store_seq_cst:
; CHECK: movq %rdx, %rcx
; CHECK: movq %rsi, %rbx
; CHECK: movq (%rdi), %rax
; CHECK: movq 8(%rdi), %rdx
; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]:
; CHECK: lock
; CHECK: cmpxchg16b (%rdi)
; CHECK: jne [[LOOP]]
store atomic i128 %in, i128* %p seq_cst, align 16
ret void
}
define void @atomic_store_release(i128* %p, i128 %in) {
; CHECK-LABEL: atomic_store_release:
; CHECK: movq %rdx, %rcx
; CHECK: movq %rsi, %rbx
; CHECK: movq (%rdi), %rax
; CHECK: movq 8(%rdi), %rdx
; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]:
; CHECK: lock
; CHECK: cmpxchg16b (%rdi)
; CHECK: jne [[LOOP]]
store atomic i128 %in, i128* %p release, align 16
ret void
}
define void @atomic_store_relaxed(i128* %p, i128 %in) {
; CHECK-LABEL: atomic_store_relaxed:
; CHECK: movq %rdx, %rcx
; CHECK: movq %rsi, %rbx
; CHECK: movq (%rdi), %rax
; CHECK: movq 8(%rdi), %rdx
; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]:
; CHECK: lock
; CHECK: cmpxchg16b (%rdi)
; CHECK: jne [[LOOP]]
store atomic i128 %in, i128* %p unordered, align 16
ret void
}

View File

@ -4,8 +4,8 @@
@sc16 = external global i16
define void @atomic_fetch_add16() nounwind {
; X64: atomic_fetch_add16
; X32: atomic_fetch_add16
; X64-LABEL: atomic_fetch_add16
; X32-LABEL: atomic_fetch_add16
entry:
; 32-bit
%t1 = atomicrmw add i16* @sc16, i16 1 acquire
@ -34,8 +34,8 @@ entry:
}
define void @atomic_fetch_sub16() nounwind {
; X64: atomic_fetch_sub16
; X32: atomic_fetch_sub16
; X64-LABEL: atomic_fetch_sub16
; X32-LABEL: atomic_fetch_sub16
%t1 = atomicrmw sub i16* @sc16, i16 1 acquire
; X64: lock
; X64: decw
@ -62,18 +62,18 @@ define void @atomic_fetch_sub16() nounwind {
}
define void @atomic_fetch_and16() nounwind {
; X64: atomic_fetch_and16
; X32: atomic_fetch_and16
; X64-LABEL: atomic_fetch_and16
; X32-LABEL: atomic_fetch_and16
%t1 = atomicrmw and i16* @sc16, i16 3 acquire
; X64: lock
; X64: andw $3, {{.*}} # encoding: [0xf0,0x66
; X32: lock
; X32: andw $3
%t2 = atomicrmw and i16* @sc16, i16 5 acquire
; X64: andw
; X64: andl
; X64: lock
; X64: cmpxchgw
; X32: andw
; X32: andl
; X32: lock
; X32: cmpxchgw
%t3 = atomicrmw and i16* @sc16, i16 %t2 acquire
@ -87,18 +87,18 @@ define void @atomic_fetch_and16() nounwind {
}
define void @atomic_fetch_or16() nounwind {
; X64: atomic_fetch_or16
; X32: atomic_fetch_or16
; X64-LABEL: atomic_fetch_or16
; X32-LABEL: atomic_fetch_or16
%t1 = atomicrmw or i16* @sc16, i16 3 acquire
; X64: lock
; X64: orw $3, {{.*}} # encoding: [0xf0,0x66
; X32: lock
; X32: orw $3
%t2 = atomicrmw or i16* @sc16, i16 5 acquire
; X64: orw
; X64: orl
; X64: lock
; X64: cmpxchgw
; X32: orw
; X32: orl
; X32: lock
; X32: cmpxchgw
%t3 = atomicrmw or i16* @sc16, i16 %t2 acquire
@ -112,18 +112,18 @@ define void @atomic_fetch_or16() nounwind {
}
define void @atomic_fetch_xor16() nounwind {
; X64: atomic_fetch_xor16
; X32: atomic_fetch_xor16
; X64-LABEL: atomic_fetch_xor16
; X32-LABEL: atomic_fetch_xor16
%t1 = atomicrmw xor i16* @sc16, i16 3 acquire
; X64: lock
; X64: xorw $3, {{.*}} # encoding: [0xf0,0x66
; X32: lock
; X32: xorw $3
%t2 = atomicrmw xor i16* @sc16, i16 5 acquire
; X64: xorw
; X64: xorl
; X64: lock
; X64: cmpxchgw
; X32: xorw
; X32: xorl
; X32: lock
; X32: cmpxchgw
%t3 = atomicrmw xor i16* @sc16, i16 %t2 acquire
@ -137,15 +137,15 @@ define void @atomic_fetch_xor16() nounwind {
}
define void @atomic_fetch_nand16(i16 %x) nounwind {
; X64: atomic_fetch_nand16
; X32: atomic_fetch_nand16
; X64-LABEL: atomic_fetch_nand16
; X32-LABEL: atomic_fetch_nand16
%t1 = atomicrmw nand i16* @sc16, i16 %x acquire
; X64: andw
; X64: notw
; X64: andl
; X64: notl
; X64: lock
; X64: cmpxchgw
; X32: andw
; X32: notw
; X32: andl
; X32: notl
; X32: lock
; X32: cmpxchgw
ret void
@ -155,12 +155,16 @@ define void @atomic_fetch_nand16(i16 %x) nounwind {
define void @atomic_fetch_max16(i16 %x) nounwind {
%t1 = atomicrmw max i16* @sc16, i16 %x acquire
; X64: cmpw
; X64: movswl
; X64: movswl
; X64: subl
; X64: cmov
; X64: lock
; X64: cmpxchgw
; X32: cmpw
; X32: movswl
; X32: movswl
; X32: subl
; X32: cmov
; X32: lock
; X32: cmpxchgw
@ -171,12 +175,16 @@ define void @atomic_fetch_max16(i16 %x) nounwind {
define void @atomic_fetch_min16(i16 %x) nounwind {
%t1 = atomicrmw min i16* @sc16, i16 %x acquire
; X64: cmpw
; X64: movswl
; X64: movswl
; X64: subl
; X64: cmov
; X64: lock
; X64: cmpxchgw
; X32: cmpw
; X32: movswl
; X32: movswl
; X32: subl
; X32: cmov
; X32: lock
; X32: cmpxchgw
@ -187,12 +195,16 @@ define void @atomic_fetch_min16(i16 %x) nounwind {
define void @atomic_fetch_umax16(i16 %x) nounwind {
%t1 = atomicrmw umax i16* @sc16, i16 %x acquire
; X64: cmpw
; X64: movzwl
; X64: movzwl
; X64: subl
; X64: cmov
; X64: lock
; X64: cmpxchgw
; X32: cmpw
; X32: movzwl
; X32: movzwl
; X32: subl
; X32: cmov
; X32: lock
; X32: cmpxchgw
@ -203,11 +215,16 @@ define void @atomic_fetch_umax16(i16 %x) nounwind {
define void @atomic_fetch_umin16(i16 %x) nounwind {
%t1 = atomicrmw umin i16* @sc16, i16 %x acquire
; X64: cmpw
; X64: movzwl
; X64: movzwl
; X64: subl
; X64: cmov
; X64: lock
; X64: cmpxchgw
; X32: cmpw
; X32: movzwl
; X32: movzwl
; X32: subl
; X32: cmov
; X32: lock
; X32: cmpxchgw

View File

@ -5,8 +5,8 @@
@sc32 = external global i32
define void @atomic_fetch_add32() nounwind {
; X64: atomic_fetch_add32
; X32: atomic_fetch_add32
; X64-LABEL: atomic_fetch_add32:
; X32-LABEL: atomic_fetch_add32:
entry:
; 32-bit
%t1 = atomicrmw add i32* @sc32, i32 1 acquire
@ -35,8 +35,8 @@ entry:
}
define void @atomic_fetch_sub32() nounwind {
; X64: atomic_fetch_sub32
; X32: atomic_fetch_sub32
; X64-LABEL: atomic_fetch_sub32:
; X32-LABEL: atomic_fetch_sub32:
%t1 = atomicrmw sub i32* @sc32, i32 1 acquire
; X64: lock
; X64: decl
@ -63,8 +63,8 @@ define void @atomic_fetch_sub32() nounwind {
}
define void @atomic_fetch_and32() nounwind {
; X64: atomic_fetch_and32
; X32: atomic_fetch_and32
; X64-LABEL: atomic_fetch_and32:
; X32-LABEL: atomic_fetch_and32:
%t1 = atomicrmw and i32* @sc32, i32 3 acquire
; X64: lock
; X64: andl $3
@ -88,8 +88,8 @@ define void @atomic_fetch_and32() nounwind {
}
define void @atomic_fetch_or32() nounwind {
; X64: atomic_fetch_or32
; X32: atomic_fetch_or32
; X64-LABEL: atomic_fetch_or32:
; X32-LABEL: atomic_fetch_or32:
%t1 = atomicrmw or i32* @sc32, i32 3 acquire
; X64: lock
; X64: orl $3
@ -113,8 +113,8 @@ define void @atomic_fetch_or32() nounwind {
}
define void @atomic_fetch_xor32() nounwind {
; X64: atomic_fetch_xor32
; X32: atomic_fetch_xor32
; X64-LABEL: atomic_fetch_xor32:
; X32-LABEL: atomic_fetch_xor32:
%t1 = atomicrmw xor i32* @sc32, i32 3 acquire
; X64: lock
; X64: xorl $3
@ -138,8 +138,8 @@ define void @atomic_fetch_xor32() nounwind {
}
define void @atomic_fetch_nand32(i32 %x) nounwind {
; X64: atomic_fetch_nand32
; X32: atomic_fetch_nand32
; X64-LABEL: atomic_fetch_nand32:
; X32-LABEL: atomic_fetch_nand32:
%t1 = atomicrmw nand i32* @sc32, i32 %x acquire
; X64: andl
; X64: notl
@ -155,19 +155,22 @@ define void @atomic_fetch_nand32(i32 %x) nounwind {
}
define void @atomic_fetch_max32(i32 %x) nounwind {
; X64-LABEL: atomic_fetch_max32:
; X32-LABEL: atomic_fetch_max32:
%t1 = atomicrmw max i32* @sc32, i32 %x acquire
; X64: cmpl
; X64: subl
; X64: cmov
; X64: lock
; X64: cmpxchgl
; X32: cmpl
; X32: subl
; X32: cmov
; X32: lock
; X32: cmpxchgl
; NOCMOV: cmpl
; NOCMOV: jl
; NOCMOV: subl
; NOCMOV: jge
; NOCMOV: lock
; NOCMOV: cmpxchgl
ret void
@ -177,19 +180,23 @@ define void @atomic_fetch_max32(i32 %x) nounwind {
}
define void @atomic_fetch_min32(i32 %x) nounwind {
; X64-LABEL: atomic_fetch_min32:
; X32-LABEL: atomic_fetch_min32:
; NOCMOV-LABEL: atomic_fetch_min32:
%t1 = atomicrmw min i32* @sc32, i32 %x acquire
; X64: cmpl
; X64: subl
; X64: cmov
; X64: lock
; X64: cmpxchgl
; X32: cmpl
; X32: subl
; X32: cmov
; X32: lock
; X32: cmpxchgl
; NOCMOV: cmpl
; NOCMOV: jg
; NOCMOV: subl
; NOCMOV: jle
; NOCMOV: lock
; NOCMOV: cmpxchgl
ret void
@ -199,40 +206,22 @@ define void @atomic_fetch_min32(i32 %x) nounwind {
}
define void @atomic_fetch_umax32(i32 %x) nounwind {
; X64-LABEL: atomic_fetch_umax32:
; X32-LABEL: atomic_fetch_umax32:
; NOCMOV-LABEL: atomic_fetch_umax32:
%t1 = atomicrmw umax i32* @sc32, i32 %x acquire
; X64: cmpl
; X64: subl
; X64: cmov
; X64: lock
; X64: cmpxchgl
; X32: cmpl
; X32: subl
; X32: cmov
; X32: lock
; X32: cmpxchgl
; NOCMOV: cmpl
; NOCMOV: jb
; NOCMOV: lock
; NOCMOV: cmpxchgl
ret void
; X64: ret
; X32: ret
; NOCMOV: ret
}
define void @atomic_fetch_umin32(i32 %x) nounwind {
%t1 = atomicrmw umin i32* @sc32, i32 %x acquire
; X64: cmpl
; X64: cmov
; X64: lock
; X64: cmpxchgl
; X32: cmpl
; X32: cmov
; X32: lock
; X32: cmpxchgl
; NOCMOV: cmpl
; NOCMOV: subl
; NOCMOV: ja
; NOCMOV: lock
; NOCMOV: cmpxchgl
@ -242,7 +231,36 @@ define void @atomic_fetch_umin32(i32 %x) nounwind {
; NOCMOV: ret
}
define void @atomic_fetch_umin32(i32 %x) nounwind {
; X64-LABEL: atomic_fetch_umin32:
; X32-LABEL: atomic_fetch_umin32:
; NOCMOV-LABEL: atomic_fetch_umin32:
%t1 = atomicrmw umin i32* @sc32, i32 %x acquire
; X64: subl
; X64: cmov
; X64: lock
; X64: cmpxchgl
; X32: subl
; X32: cmov
; X32: lock
; X32: cmpxchgl
; NOCMOV: subl
; NOCMOV: jb
; NOCMOV: lock
; NOCMOV: cmpxchgl
ret void
; X64: ret
; X32: ret
; NOCMOV: ret
}
define void @atomic_fetch_cmpxchg32() nounwind {
; X64-LABEL: atomic_fetch_cmpxchg32:
; X32-LABEL: atomic_fetch_cmpxchg32:
%t1 = cmpxchg i32* @sc32, i32 0, i32 1 acquire acquire
; X64: lock
; X64: cmpxchgl
@ -254,6 +272,9 @@ define void @atomic_fetch_cmpxchg32() nounwind {
}
define void @atomic_fetch_store32(i32 %x) nounwind {
; X64-LABEL: atomic_fetch_store32:
; X32-LABEL: atomic_fetch_store32:
store atomic i32 %x, i32* @sc32 release, align 4
; X64-NOT: lock
; X64: movl
@ -265,6 +286,9 @@ define void @atomic_fetch_store32(i32 %x) nounwind {
}
define void @atomic_fetch_swap32(i32 %x) nounwind {
; X64-LABEL: atomic_fetch_swap32:
; X32-LABEL: atomic_fetch_swap32:
%t1 = atomicrmw xchg i32* @sc32, i32 %x acquire
; X64-NOT: lock
; X64: xchgl

View File

@ -3,7 +3,8 @@
@sc64 = external global i64
define void @atomic_fetch_add64() nounwind {
; X64: atomic_fetch_add64
; X64-LABEL: atomic_fetch_add64:
; X32-LABEL: atomic_fetch_add64:
entry:
%t1 = atomicrmw add i64* @sc64, i64 1 acquire
; X64: lock
@ -22,7 +23,8 @@ entry:
}
define void @atomic_fetch_sub64() nounwind {
; X64: atomic_fetch_sub64
; X64-LABEL: atomic_fetch_sub64:
; X32-LABEL: atomic_fetch_sub64:
%t1 = atomicrmw sub i64* @sc64, i64 1 acquire
; X64: lock
; X64: decq
@ -40,7 +42,8 @@ define void @atomic_fetch_sub64() nounwind {
}
define void @atomic_fetch_and64() nounwind {
; X64: atomic_fetch_and64
; X64-LABEL: atomic_fetch_and64:
; X32-LABEL: atomic_fetch_and64:
%t1 = atomicrmw and i64* @sc64, i64 3 acquire
; X64: lock
; X64: andq $3
@ -56,7 +59,8 @@ define void @atomic_fetch_and64() nounwind {
}
define void @atomic_fetch_or64() nounwind {
; X64: atomic_fetch_or64
; X64-LABEL: atomic_fetch_or64:
; X32-LABEL: atomic_fetch_or64:
%t1 = atomicrmw or i64* @sc64, i64 3 acquire
; X64: lock
; X64: orq $3
@ -72,7 +76,8 @@ define void @atomic_fetch_or64() nounwind {
}
define void @atomic_fetch_xor64() nounwind {
; X64: atomic_fetch_xor64
; X64-LABEL: atomic_fetch_xor64:
; X32-LABEL: atomic_fetch_xor64:
%t1 = atomicrmw xor i64* @sc64, i64 3 acquire
; X64: lock
; X64: xorq $3
@ -88,8 +93,8 @@ define void @atomic_fetch_xor64() nounwind {
}
define void @atomic_fetch_nand64(i64 %x) nounwind {
; X64: atomic_fetch_nand64
; X32: atomic_fetch_nand64
; X64-LABEL: atomic_fetch_nand64:
; X32-LABEL: atomic_fetch_nand64:
%t1 = atomicrmw nand i64* @sc64, i64 %x acquire
; X64: andq
; X64: notq
@ -107,8 +112,10 @@ define void @atomic_fetch_nand64(i64 %x) nounwind {
}
define void @atomic_fetch_max64(i64 %x) nounwind {
; X64-LABEL: atomic_fetch_max64:
; X32-LABEL: atomic_fetch_max64:
%t1 = atomicrmw max i64* @sc64, i64 %x acquire
; X64: cmpq
; X64: subq
; X64: cmov
; X64: lock
; X64: cmpxchgq
@ -126,8 +133,10 @@ define void @atomic_fetch_max64(i64 %x) nounwind {
}
define void @atomic_fetch_min64(i64 %x) nounwind {
; X64-LABEL: atomic_fetch_min64:
; X32-LABEL: atomic_fetch_min64:
%t1 = atomicrmw min i64* @sc64, i64 %x acquire
; X64: cmpq
; X64: subq
; X64: cmov
; X64: lock
; X64: cmpxchgq
@ -145,8 +154,10 @@ define void @atomic_fetch_min64(i64 %x) nounwind {
}
define void @atomic_fetch_umax64(i64 %x) nounwind {
; X64-LABEL: atomic_fetch_umax64:
; X32-LABEL: atomic_fetch_umax64:
%t1 = atomicrmw umax i64* @sc64, i64 %x acquire
; X64: cmpq
; X64: subq
; X64: cmov
; X64: lock
; X64: cmpxchgq
@ -164,8 +175,10 @@ define void @atomic_fetch_umax64(i64 %x) nounwind {
}
define void @atomic_fetch_umin64(i64 %x) nounwind {
; X64-LABEL: atomic_fetch_umin64:
; X32-LABEL: atomic_fetch_umin64:
%t1 = atomicrmw umin i64* @sc64, i64 %x acquire
; X64: cmpq
; X64: subq
; X64: cmov
; X64: lock
; X64: cmpxchgq
@ -183,6 +196,8 @@ define void @atomic_fetch_umin64(i64 %x) nounwind {
}
define void @atomic_fetch_cmpxchg64() nounwind {
; X64-LABEL: atomic_fetch_cmpxchg64:
; X32-LABEL: atomic_fetch_cmpxchg64:
%t1 = cmpxchg i64* @sc64, i64 0, i64 1 acquire acquire
; X64: lock
; X64: cmpxchgq
@ -194,6 +209,8 @@ define void @atomic_fetch_cmpxchg64() nounwind {
}
define void @atomic_fetch_store64(i64 %x) nounwind {
; X64-LABEL: atomic_fetch_store64:
; X32-LABEL: atomic_fetch_store64:
store atomic i64 %x, i64* @sc64 release, align 8
; X64-NOT: lock
; X64: movq
@ -205,6 +222,8 @@ define void @atomic_fetch_store64(i64 %x) nounwind {
}
define void @atomic_fetch_swap64(i64 %x) nounwind {
; X64-LABEL: atomic_fetch_swap64:
; X32-LABEL: atomic_fetch_swap64:
%t1 = atomicrmw xchg i64* @sc64, i64 %x acquire
; X64-NOT: lock
; X64: xchgq

View File

@ -3,7 +3,8 @@
@sc64 = external global i64
define void @atomic_fetch_add64() nounwind {
; X32: atomic_fetch_add64
; X64-LABEL: atomic_fetch_add64:
; X32-LABEL: atomic_fetch_add64:
entry:
%t1 = atomicrmw add i64* @sc64, i64 1 acquire
; X32: addl
@ -30,20 +31,21 @@ entry:
}
define void @atomic_fetch_sub64() nounwind {
; X32: atomic_fetch_sub64
; X64-LABEL: atomic_fetch_sub64:
; X32-LABEL: atomic_fetch_sub64:
%t1 = atomicrmw sub i64* @sc64, i64 1 acquire
; X32: subl
; X32: sbbl
; X32: addl $-1
; X32: adcl $-1
; X32: lock
; X32: cmpxchg8b
%t2 = atomicrmw sub i64* @sc64, i64 3 acquire
; X32: subl
; X32: sbbl
; X32: addl $-3
; X32: adcl $-1
; X32: lock
; X32: cmpxchg8b
%t3 = atomicrmw sub i64* @sc64, i64 5 acquire
; X32: subl
; X32: sbbl
; X32: addl $-5
; X32: adcl $-1
; X32: lock
; X32: cmpxchg8b
%t4 = atomicrmw sub i64* @sc64, i64 %t3 acquire
@ -56,15 +58,16 @@ define void @atomic_fetch_sub64() nounwind {
}
define void @atomic_fetch_and64() nounwind {
; X32: atomic_fetch_and64
; X64-LABEL: atomic_fetch_and:64
; X32-LABEL: atomic_fetch_and64:
%t1 = atomicrmw and i64* @sc64, i64 3 acquire
; X32: andl
; X32: andl
; X32: andl $3
; X32-NOT: andl
; X32: lock
; X32: cmpxchg8b
%t2 = atomicrmw and i64* @sc64, i64 5 acquire
; X32: andl
; X32: andl
%t2 = atomicrmw and i64* @sc64, i64 4294967297 acquire
; X32: andl $1
; X32: andl $1
; X32: lock
; X32: cmpxchg8b
%t3 = atomicrmw and i64* @sc64, i64 %t2 acquire
@ -77,15 +80,16 @@ define void @atomic_fetch_and64() nounwind {
}
define void @atomic_fetch_or64() nounwind {
; X32: atomic_fetch_or64
; X64-LABEL: atomic_fetch_or64:
; X32-LABEL: atomic_fetch_or64:
%t1 = atomicrmw or i64* @sc64, i64 3 acquire
; X32: orl
; X32: orl
; X32: orl $3
; X32-NOT: orl
; X32: lock
; X32: cmpxchg8b
%t2 = atomicrmw or i64* @sc64, i64 5 acquire
; X32: orl
; X32: orl
%t2 = atomicrmw or i64* @sc64, i64 4294967297 acquire
; X32: orl $1
; X32: orl $1
; X32: lock
; X32: cmpxchg8b
%t3 = atomicrmw or i64* @sc64, i64 %t2 acquire
@ -98,15 +102,16 @@ define void @atomic_fetch_or64() nounwind {
}
define void @atomic_fetch_xor64() nounwind {
; X32: atomic_fetch_xor64
; X64-LABEL: atomic_fetch_xor:64
; X32-LABEL: atomic_fetch_xor64:
%t1 = atomicrmw xor i64* @sc64, i64 3 acquire
; X32: xorl
; X32: xorl
; X32-NOT: xorl
; X32: lock
; X32: cmpxchg8b
%t2 = atomicrmw xor i64* @sc64, i64 5 acquire
; X32: xorl
; X32: xorl
%t2 = atomicrmw xor i64* @sc64, i64 4294967297 acquire
; X32: xorl $1
; X32: xorl $1
; X32: lock
; X32: cmpxchg8b
%t3 = atomicrmw xor i64* @sc64, i64 %t2 acquire
@ -119,7 +124,8 @@ define void @atomic_fetch_xor64() nounwind {
}
define void @atomic_fetch_nand64(i64 %x) nounwind {
; X32: atomic_fetch_nand64
; X64-LABEL: atomic_fetch_nand64:
; X32-LABEL: atomic_fetch_nand64:
%t1 = atomicrmw nand i64* @sc64, i64 %x acquire
; X32: andl
; X32: andl
@ -132,10 +138,11 @@ define void @atomic_fetch_nand64(i64 %x) nounwind {
}
define void @atomic_fetch_max64(i64 %x) nounwind {
; X64-LABEL: atomic_fetch_max:64
; X32-LABEL: atomic_fetch_max64:
%t1 = atomicrmw max i64* @sc64, i64 %x acquire
; X32: cmpl
; X32: cmpl
; X32: cmov
; X32: subl
; X32: subl
; X32: cmov
; X32: cmov
; X32: lock
@ -145,10 +152,11 @@ define void @atomic_fetch_max64(i64 %x) nounwind {
}
define void @atomic_fetch_min64(i64 %x) nounwind {
; X64-LABEL: atomic_fetch_min64:
; X32-LABEL: atomic_fetch_min64:
%t1 = atomicrmw min i64* @sc64, i64 %x acquire
; X32: cmpl
; X32: cmpl
; X32: cmov
; X32: subl
; X32: subl
; X32: cmov
; X32: cmov
; X32: lock
@ -158,10 +166,11 @@ define void @atomic_fetch_min64(i64 %x) nounwind {
}
define void @atomic_fetch_umax64(i64 %x) nounwind {
; X64-LABEL: atomic_fetch_umax:64
; X32-LABEL: atomic_fetch_umax64:
%t1 = atomicrmw umax i64* @sc64, i64 %x acquire
; X32: cmpl
; X32: cmpl
; X32: cmov
; X32: subl
; X32: subl
; X32: cmov
; X32: cmov
; X32: lock
@ -171,10 +180,11 @@ define void @atomic_fetch_umax64(i64 %x) nounwind {
}
define void @atomic_fetch_umin64(i64 %x) nounwind {
; X64-LABEL: atomic_fetch_umin64:
; X32-LABEL: atomic_fetch_umin64:
%t1 = atomicrmw umin i64* @sc64, i64 %x acquire
; X32: cmpl
; X32: cmpl
; X32: cmov
; X32: subl
; X32: subl
; X32: cmov
; X32: cmov
; X32: lock
@ -184,6 +194,8 @@ define void @atomic_fetch_umin64(i64 %x) nounwind {
}
define void @atomic_fetch_cmpxchg64() nounwind {
; X64-LABEL: atomic_fetch_cmpxchg:64
; X32-LABEL: atomic_fetch_cmpxchg64:
%t1 = cmpxchg i64* @sc64, i64 0, i64 1 acquire acquire
; X32: lock
; X32: cmpxchg8b
@ -192,6 +204,8 @@ define void @atomic_fetch_cmpxchg64() nounwind {
}
define void @atomic_fetch_store64(i64 %x) nounwind {
; X64-LABEL: atomic_fetch_store64:
; X32-LABEL: atomic_fetch_store64:
store atomic i64 %x, i64* @sc64 release, align 8
; X32: lock
; X32: cmpxchg8b
@ -200,6 +214,8 @@ define void @atomic_fetch_store64(i64 %x) nounwind {
}
define void @atomic_fetch_swap64(i64 %x) nounwind {
; X64-LABEL: atomic_fetch_swap64:
; X32-LABEL: atomic_fetch_swap64:
%t1 = atomicrmw xchg i64* @sc64, i64 %x acquire
; X32: lock
; X32: xchg8b

View File

@ -4,8 +4,8 @@
@sc8 = external global i8
define void @atomic_fetch_add8() nounwind {
; X64: atomic_fetch_add8
; X32: atomic_fetch_add8
; X64-LABEL: atomic_fetch_add8:
; X32-LABEL: atomic_fetch_add8:
entry:
; 32-bit
%t1 = atomicrmw add i8* @sc8, i8 1 acquire
@ -34,8 +34,8 @@ entry:
}
define void @atomic_fetch_sub8() nounwind {
; X64: atomic_fetch_sub8
; X32: atomic_fetch_sub8
; X64-LABEL: atomic_fetch_sub8:
; X32-LABEL: atomic_fetch_sub8:
%t1 = atomicrmw sub i8* @sc8, i8 1 acquire
; X64: lock
; X64: decb
@ -62,8 +62,8 @@ define void @atomic_fetch_sub8() nounwind {
}
define void @atomic_fetch_and8() nounwind {
; X64: atomic_fetch_and8
; X32: atomic_fetch_and8
; X64-LABEL: atomic_fetch_and8:
; X32-LABEL: atomic_fetch_and8:
%t1 = atomicrmw and i8* @sc8, i8 3 acquire
; X64: lock
; X64: andb $3
@ -87,8 +87,8 @@ define void @atomic_fetch_and8() nounwind {
}
define void @atomic_fetch_or8() nounwind {
; X64: atomic_fetch_or8
; X32: atomic_fetch_or8
; X64-LABEL: atomic_fetch_or8:
; X32-LABEL: atomic_fetch_or8:
%t1 = atomicrmw or i8* @sc8, i8 3 acquire
; X64: lock
; X64: orb $3
@ -112,8 +112,8 @@ define void @atomic_fetch_or8() nounwind {
}
define void @atomic_fetch_xor8() nounwind {
; X64: atomic_fetch_xor8
; X32: atomic_fetch_xor8
; X64-LABEL: atomic_fetch_xor8:
; X32-LABEL: atomic_fetch_xor8:
%t1 = atomicrmw xor i8* @sc8, i8 3 acquire
; X64: lock
; X64: xorb $3
@ -137,8 +137,8 @@ define void @atomic_fetch_xor8() nounwind {
}
define void @atomic_fetch_nand8(i8 %x) nounwind {
; X64: atomic_fetch_nand8
; X32: atomic_fetch_nand8
; X64-LABEL: atomic_fetch_nand8:
; X32-LABEL: atomic_fetch_nand8:
%t1 = atomicrmw nand i8* @sc8, i8 %x acquire
; X64: andb
; X64: notb
@ -154,14 +154,18 @@ define void @atomic_fetch_nand8(i8 %x) nounwind {
}
define void @atomic_fetch_max8(i8 %x) nounwind {
; X64-LABEL: atomic_fetch_max8:
; X32-LABEL: atomic_fetch_max8:
%t1 = atomicrmw max i8* @sc8, i8 %x acquire
; X64: cmpb
; X64: cmov
; X64: movsbl
; X64: movsbl
; X64: subl
; X64: lock
; X64: cmpxchgb
; X32: cmpb
; X32: cmov
; X32: movsbl
; X32: movsbl
; X32: subl
; X32: lock
; X32: cmpxchgb
ret void
@ -170,14 +174,18 @@ define void @atomic_fetch_max8(i8 %x) nounwind {
}
define void @atomic_fetch_min8(i8 %x) nounwind {
; X64-LABEL: atomic_fetch_min8:
; X32-LABEL: atomic_fetch_min8:
%t1 = atomicrmw min i8* @sc8, i8 %x acquire
; X64: cmpb
; X64: cmov
; X64: movsbl
; X64: movsbl
; X64: subl
; X64: lock
; X64: cmpxchgb
; X32: cmpb
; X32: cmov
; X32: movsbl
; X32: movsbl
; X32: subl
; X32: lock
; X32: cmpxchgb
ret void
@ -186,14 +194,18 @@ define void @atomic_fetch_min8(i8 %x) nounwind {
}
define void @atomic_fetch_umax8(i8 %x) nounwind {
; X64-LABEL: atomic_fetch_umax8:
; X32-LABEL: atomic_fetch_umax8:
%t1 = atomicrmw umax i8* @sc8, i8 %x acquire
; X64: cmpb
; X64: cmov
; X64: movzbl
; X64: movzbl
; X64: subl
; X64: lock
; X64: cmpxchgb
; X32: cmpb
; X32: cmov
; X32: movzbl
; X32: movzbl
; X32: subl
; X32: lock
; X32: cmpxchgb
ret void
@ -202,13 +214,18 @@ define void @atomic_fetch_umax8(i8 %x) nounwind {
}
define void @atomic_fetch_umin8(i8 %x) nounwind {
; X64-LABEL: atomic_fetch_umin8:
; X32-LABEL: atomic_fetch_umin8:
%t1 = atomicrmw umin i8* @sc8, i8 %x acquire
; X64: cmpb
; X64: cmov
; X64: movzbl
; X64: movzbl
; X64: subl
; X64: lock
; X64: cmpxchgb
; X32: cmpb
; X32: cmov
; X32: movzbl
; X32: movzbl
; X32: subl
; X32: lock
; X32: cmpxchgb
ret void
@ -217,6 +234,8 @@ define void @atomic_fetch_umin8(i8 %x) nounwind {
}
define void @atomic_fetch_cmpxchg8() nounwind {
; X64-LABEL: atomic_fetch_cmpxchg8:
; X32-LABEL: atomic_fetch_cmpxchg8:
%t1 = cmpxchg i8* @sc8, i8 0, i8 1 acquire acquire
; X64: lock
; X64: cmpxchgb
@ -228,6 +247,8 @@ define void @atomic_fetch_cmpxchg8() nounwind {
}
define void @atomic_fetch_store8(i8 %x) nounwind {
; X64-LABEL: atomic_fetch_store8:
; X32-LABEL: atomic_fetch_store8:
store atomic i8 %x, i8* @sc8 release, align 4
; X64-NOT: lock
; X64: movb
@ -239,6 +260,8 @@ define void @atomic_fetch_store8(i8 %x) nounwind {
}
define void @atomic_fetch_swap8(i8 %x) nounwind {
; X64-LABEL: atomic_fetch_swap8:
; X32-LABEL: atomic_fetch_swap8:
%t1 = atomicrmw xchg i8* @sc8, i8 %x acquire
; X64-NOT: lock
; X64: xchgb

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -mcpu=generic -march=x86 -mattr=+cmov -verify-machineinstrs | FileCheck %s
; RUN: llc < %s -mcpu=generic -march=x86 -mattr=+cmov,cx16 -verify-machineinstrs | FileCheck %s
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
@ -110,19 +110,19 @@ entry:
%17 = extractvalue { i32, i1 } %pair17, 0
store i32 %17, i32* %old
; CHECK: movl [[R17atomic:.*]], %eax
; CHECK: movl $1401, %[[R17mask:[a-z]*]]
; CHECK: andl %eax, %[[R17mask]]
; CHECK: notl %[[R17mask]]
; CHECK: movl %eax, %[[R17mask:[a-z]*]]
; CHECK: notl %[[R17mask]]
; CHECK: orl $-1402, %[[R17mask]]
; CHECK: lock
; CHECK: cmpxchgl %[[R17mask]], [[R17atomic]]
; CHECK: jne
; CHECK: movl %eax,
%18 = atomicrmw nand i32* %val2, i32 1401 monotonic
store i32 %18, i32* %old
; CHECK: andl
; CHECK: andl
; CHECK: notl
; CHECK: notl
; CHECK: orl $252645135
; CHECK: orl $252645135
; CHECK: lock
; CHECK: cmpxchg8b
%19 = atomicrmw nand i64* %temp64, i64 17361641481138401520 monotonic

View File

@ -5,29 +5,29 @@ define void @atomic_maxmin_i8() {
; CHECK: atomic_maxmin_i8
%1 = atomicrmw max i8* @sc8, i8 5 acquire
; CHECK: [[LABEL1:\.?LBB[0-9]+_[0-9]+]]:
; CHECK: cmpb
; CHECK: cmovl
; CHECK: movsbl
; CHECK: cmpl
; CHECK: lock
; CHECK-NEXT: cmpxchgb
; CHECK: jne [[LABEL1]]
%2 = atomicrmw min i8* @sc8, i8 6 acquire
; CHECK: [[LABEL3:\.?LBB[0-9]+_[0-9]+]]:
; CHECK: cmpb
; CHECK: cmovg
; CHECK: movsbl
; CHECK: cmpl
; CHECK: lock
; CHECK-NEXT: cmpxchgb
; CHECK: jne [[LABEL3]]
%3 = atomicrmw umax i8* @sc8, i8 7 acquire
; CHECK: [[LABEL5:\.?LBB[0-9]+_[0-9]+]]:
; CHECK: cmpb
; CHECK: cmovb
; CHECK: movzbl
; CHECK: cmpl
; CHECK: lock
; CHECK-NEXT: cmpxchgb
; CHECK: jne [[LABEL5]]
%4 = atomicrmw umin i8* @sc8, i8 8 acquire
; CHECK: [[LABEL7:\.?LBB[0-9]+_[0-9]+]]:
; CHECK: cmpb
; CHECK: cmova
; CHECK: movzbl
; CHECK: cmpl
; CHECK: lock
; CHECK-NEXT: cmpxchgb
; CHECK: jne [[LABEL7]]