1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 02:52:53 +02:00

ARM64: switch to IR-based atomic operations.

Goodbye code!

(Game: spot the bug fixed by the change).

llvm-svn: 206490
This commit is contained in:
Tim Northover 2014-04-17 20:00:33 +00:00
parent d47e9a6e0d
commit 77edcc9a3a
6 changed files with 165 additions and 878 deletions

View File

@ -157,9 +157,6 @@ public:
SDNode *SelectSIMDAddSubNarrowing(unsigned IntNo, SDNode *Node);
SDNode *SelectSIMDXtnNarrowing(unsigned IntNo, SDNode *Node);
SDNode *SelectAtomic(SDNode *Node, unsigned Op8, unsigned Op16, unsigned Op32,
unsigned Op64);
SDNode *SelectBitfieldExtractOp(SDNode *N);
SDNode *SelectBitfieldInsertOp(SDNode *N);
@ -1138,37 +1135,6 @@ SDNode *ARM64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs,
return St;
}
SDNode *ARM64DAGToDAGISel::SelectAtomic(SDNode *Node, unsigned Op8,
unsigned Op16, unsigned Op32,
unsigned Op64) {
// Mostly direct translation to the given operations, except that we preserve
// the AtomicOrdering for use later on.
AtomicSDNode *AN = cast<AtomicSDNode>(Node);
EVT VT = AN->getMemoryVT();
unsigned Op;
if (VT == MVT::i8)
Op = Op8;
else if (VT == MVT::i16)
Op = Op16;
else if (VT == MVT::i32)
Op = Op32;
else if (VT == MVT::i64)
Op = Op64;
else
llvm_unreachable("Unexpected atomic operation");
SmallVector<SDValue, 4> Ops;
for (unsigned i = 1; i < AN->getNumOperands(); ++i)
Ops.push_back(AN->getOperand(i));
Ops.push_back(CurDAG->getTargetConstant(AN->getOrdering(), MVT::i32));
Ops.push_back(AN->getOperand(0)); // Chain moves to the end
return CurDAG->SelectNodeTo(Node, Op, AN->getValueType(0), MVT::Other,
&Ops[0], Ops.size());
}
static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N,
unsigned &Opc, SDValue &Opd0,
unsigned &LSB, unsigned &MSB,
@ -1829,54 +1795,6 @@ SDNode *ARM64DAGToDAGISel::Select(SDNode *Node) {
return I;
break;
case ISD::ATOMIC_LOAD_ADD:
return SelectAtomic(Node, ARM64::ATOMIC_LOAD_ADD_I8,
ARM64::ATOMIC_LOAD_ADD_I16, ARM64::ATOMIC_LOAD_ADD_I32,
ARM64::ATOMIC_LOAD_ADD_I64);
case ISD::ATOMIC_LOAD_SUB:
return SelectAtomic(Node, ARM64::ATOMIC_LOAD_SUB_I8,
ARM64::ATOMIC_LOAD_SUB_I16, ARM64::ATOMIC_LOAD_SUB_I32,
ARM64::ATOMIC_LOAD_SUB_I64);
case ISD::ATOMIC_LOAD_AND:
return SelectAtomic(Node, ARM64::ATOMIC_LOAD_AND_I8,
ARM64::ATOMIC_LOAD_AND_I16, ARM64::ATOMIC_LOAD_AND_I32,
ARM64::ATOMIC_LOAD_AND_I64);
case ISD::ATOMIC_LOAD_OR:
return SelectAtomic(Node, ARM64::ATOMIC_LOAD_OR_I8,
ARM64::ATOMIC_LOAD_OR_I16, ARM64::ATOMIC_LOAD_OR_I32,
ARM64::ATOMIC_LOAD_OR_I64);
case ISD::ATOMIC_LOAD_XOR:
return SelectAtomic(Node, ARM64::ATOMIC_LOAD_XOR_I8,
ARM64::ATOMIC_LOAD_XOR_I16, ARM64::ATOMIC_LOAD_XOR_I32,
ARM64::ATOMIC_LOAD_XOR_I64);
case ISD::ATOMIC_LOAD_NAND:
return SelectAtomic(
Node, ARM64::ATOMIC_LOAD_NAND_I8, ARM64::ATOMIC_LOAD_NAND_I16,
ARM64::ATOMIC_LOAD_NAND_I32, ARM64::ATOMIC_LOAD_NAND_I64);
case ISD::ATOMIC_LOAD_MIN:
return SelectAtomic(Node, ARM64::ATOMIC_LOAD_MIN_I8,
ARM64::ATOMIC_LOAD_MIN_I16, ARM64::ATOMIC_LOAD_MIN_I32,
ARM64::ATOMIC_LOAD_MIN_I64);
case ISD::ATOMIC_LOAD_MAX:
return SelectAtomic(Node, ARM64::ATOMIC_LOAD_MAX_I8,
ARM64::ATOMIC_LOAD_MAX_I16, ARM64::ATOMIC_LOAD_MAX_I32,
ARM64::ATOMIC_LOAD_MAX_I64);
case ISD::ATOMIC_LOAD_UMIN:
return SelectAtomic(
Node, ARM64::ATOMIC_LOAD_UMIN_I8, ARM64::ATOMIC_LOAD_UMIN_I16,
ARM64::ATOMIC_LOAD_UMIN_I32, ARM64::ATOMIC_LOAD_UMIN_I64);
case ISD::ATOMIC_LOAD_UMAX:
return SelectAtomic(
Node, ARM64::ATOMIC_LOAD_UMAX_I8, ARM64::ATOMIC_LOAD_UMAX_I16,
ARM64::ATOMIC_LOAD_UMAX_I32, ARM64::ATOMIC_LOAD_UMAX_I64);
case ISD::ATOMIC_SWAP:
return SelectAtomic(Node, ARM64::ATOMIC_SWAP_I8, ARM64::ATOMIC_SWAP_I16,
ARM64::ATOMIC_SWAP_I32, ARM64::ATOMIC_SWAP_I64);
case ISD::ATOMIC_CMP_SWAP:
return SelectAtomic(Node, ARM64::ATOMIC_CMP_SWAP_I8,
ARM64::ATOMIC_CMP_SWAP_I16, ARM64::ATOMIC_CMP_SWAP_I32,
ARM64::ATOMIC_CMP_SWAP_I64);
case ISD::LOAD: {
// Try to select as an indexed load. Fall through to normal processing
// if we can't.

View File

@ -222,26 +222,6 @@ ARM64TargetLowering::ARM64TargetLowering(ARM64TargetMachine &TM)
setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
setOperationAction(ISD::FP_ROUND, MVT::f64, Custom);
// 128-bit atomics
setOperationAction(ISD::ATOMIC_SWAP, MVT::i128, Custom);
setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i128, Custom);
setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, Custom);
setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i128, Custom);
setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i128, Custom);
setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i128, Custom);
setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i128, Custom);
setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i128, Custom);
setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i128, Custom);
setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i128, Custom);
setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i128, Custom);
setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i128, Custom);
// These are surprisingly difficult. The only single-copy atomic 128-bit
// instruction on AArch64 is stxp (when it succeeds). So a store can safely
// become a simple swap, but a load can only be determined to have been atomic
// if storing the same value back succeeds.
setOperationAction(ISD::ATOMIC_LOAD, MVT::i128, Custom);
setOperationAction(ISD::ATOMIC_STORE, MVT::i128, Expand);
// Variable arguments.
setOperationAction(ISD::VASTART, MVT::Other, Custom);
setOperationAction(ISD::VAARG, MVT::Other, Custom);
@ -706,437 +686,6 @@ const char *ARM64TargetLowering::getTargetNodeName(unsigned Opcode) const {
}
}
static void getExclusiveOperation(unsigned Size, AtomicOrdering Ord,
unsigned &LdrOpc, unsigned &StrOpc) {
static unsigned LoadBares[] = { ARM64::LDXRB, ARM64::LDXRH, ARM64::LDXRW,
ARM64::LDXRX, ARM64::LDXPX };
static unsigned LoadAcqs[] = { ARM64::LDAXRB, ARM64::LDAXRH, ARM64::LDAXRW,
ARM64::LDAXRX, ARM64::LDAXPX };
static unsigned StoreBares[] = { ARM64::STXRB, ARM64::STXRH, ARM64::STXRW,
ARM64::STXRX, ARM64::STXPX };
static unsigned StoreRels[] = { ARM64::STLXRB, ARM64::STLXRH, ARM64::STLXRW,
ARM64::STLXRX, ARM64::STLXPX };
unsigned *LoadOps, *StoreOps;
if (Ord == Acquire || Ord == AcquireRelease || Ord == SequentiallyConsistent)
LoadOps = LoadAcqs;
else
LoadOps = LoadBares;
if (Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent)
StoreOps = StoreRels;
else
StoreOps = StoreBares;
assert(isPowerOf2_32(Size) && Size <= 16 &&
"unsupported size for atomic binary op!");
LdrOpc = LoadOps[Log2_32(Size)];
StrOpc = StoreOps[Log2_32(Size)];
}
MachineBasicBlock *ARM64TargetLowering::EmitAtomicCmpSwap(MachineInstr *MI,
MachineBasicBlock *BB,
unsigned Size) const {
unsigned dest = MI->getOperand(0).getReg();
unsigned ptr = MI->getOperand(1).getReg();
unsigned oldval = MI->getOperand(2).getReg();
unsigned newval = MI->getOperand(3).getReg();
AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(4).getImm());
unsigned scratch = BB->getParent()->getRegInfo().createVirtualRegister(
&ARM64::GPR32RegClass);
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
DebugLoc dl = MI->getDebugLoc();
// FIXME: We currently always generate a seq_cst operation; we should
// be able to relax this in some cases.
unsigned ldrOpc, strOpc;
getExclusiveOperation(Size, Ord, ldrOpc, strOpc);
MachineFunction *MF = BB->getParent();
const BasicBlock *LLVM_BB = BB->getBasicBlock();
MachineFunction::iterator It = BB;
++It; // insert the new blocks after the current block
MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
MF->insert(It, loop1MBB);
MF->insert(It, loop2MBB);
MF->insert(It, exitMBB);
// Transfer the remainder of BB and its successor edges to exitMBB.
exitMBB->splice(exitMBB->begin(), BB,
std::next(MachineBasicBlock::iterator(MI)), BB->end());
exitMBB->transferSuccessorsAndUpdatePHIs(BB);
// thisMBB:
// ...
// fallthrough --> loop1MBB
BB->addSuccessor(loop1MBB);
// loop1MBB:
// ldrex dest, [ptr]
// cmp dest, oldval
// bne exitMBB
BB = loop1MBB;
BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr);
BuildMI(BB, dl, TII->get(Size == 8 ? ARM64::SUBSXrr : ARM64::SUBSWrr))
.addReg(Size == 8 ? ARM64::XZR : ARM64::WZR, RegState::Define)
.addReg(dest)
.addReg(oldval);
BuildMI(BB, dl, TII->get(ARM64::Bcc)).addImm(ARM64CC::NE).addMBB(exitMBB);
BB->addSuccessor(loop2MBB);
BB->addSuccessor(exitMBB);
// loop2MBB:
// strex scratch, newval, [ptr]
// cmp scratch, #0
// bne loop1MBB
BB = loop2MBB;
BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(newval).addReg(ptr);
BuildMI(BB, dl, TII->get(ARM64::CBNZW)).addReg(scratch).addMBB(loop1MBB);
BB->addSuccessor(loop1MBB);
BB->addSuccessor(exitMBB);
// exitMBB:
// ...
BB = exitMBB;
MI->eraseFromParent(); // The instruction is gone now.
return BB;
}
MachineBasicBlock *
ARM64TargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
unsigned Size, unsigned BinOpcode) const {
// This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
const BasicBlock *LLVM_BB = BB->getBasicBlock();
MachineFunction *MF = BB->getParent();
MachineFunction::iterator It = BB;
++It;
unsigned dest = MI->getOperand(0).getReg();
unsigned ptr = MI->getOperand(1).getReg();
unsigned incr = MI->getOperand(2).getReg();
AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(3).getImm());
DebugLoc dl = MI->getDebugLoc();
unsigned ldrOpc, strOpc;
getExclusiveOperation(Size, Ord, ldrOpc, strOpc);
MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
MF->insert(It, loopMBB);
MF->insert(It, exitMBB);
// Transfer the remainder of BB and its successor edges to exitMBB.
exitMBB->splice(exitMBB->begin(), BB,
std::next(MachineBasicBlock::iterator(MI)), BB->end());
exitMBB->transferSuccessorsAndUpdatePHIs(BB);
MachineRegisterInfo &RegInfo = MF->getRegInfo();
unsigned scratch = RegInfo.createVirtualRegister(&ARM64::GPR32RegClass);
unsigned scratch2 =
(!BinOpcode)
? incr
: RegInfo.createVirtualRegister(Size == 8 ? &ARM64::GPR64RegClass
: &ARM64::GPR32RegClass);
// thisMBB:
// ...
// fallthrough --> loopMBB
BB->addSuccessor(loopMBB);
// loopMBB:
// ldxr dest, ptr
// <binop> scratch2, dest, incr
// stxr scratch, scratch2, ptr
// cbnz scratch, loopMBB
// fallthrough --> exitMBB
BB = loopMBB;
BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr);
if (BinOpcode) {
// operand order needs to go the other way for NAND
if (BinOpcode == ARM64::BICWrr || BinOpcode == ARM64::BICXrr)
BuildMI(BB, dl, TII->get(BinOpcode), scratch2).addReg(incr).addReg(dest);
else
BuildMI(BB, dl, TII->get(BinOpcode), scratch2).addReg(dest).addReg(incr);
}
BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2).addReg(ptr);
BuildMI(BB, dl, TII->get(ARM64::CBNZW)).addReg(scratch).addMBB(loopMBB);
BB->addSuccessor(loopMBB);
BB->addSuccessor(exitMBB);
// exitMBB:
// ...
BB = exitMBB;
MI->eraseFromParent(); // The instruction is gone now.
return BB;
}
MachineBasicBlock *ARM64TargetLowering::EmitAtomicBinary128(
MachineInstr *MI, MachineBasicBlock *BB, unsigned BinOpcodeLo,
unsigned BinOpcodeHi) const {
// This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
const BasicBlock *LLVM_BB = BB->getBasicBlock();
MachineFunction *MF = BB->getParent();
MachineFunction::iterator It = BB;
++It;
unsigned DestLo = MI->getOperand(0).getReg();
unsigned DestHi = MI->getOperand(1).getReg();
unsigned Ptr = MI->getOperand(2).getReg();
unsigned IncrLo = MI->getOperand(3).getReg();
unsigned IncrHi = MI->getOperand(4).getReg();
AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(5).getImm());
DebugLoc DL = MI->getDebugLoc();
unsigned LdrOpc, StrOpc;
getExclusiveOperation(16, Ord, LdrOpc, StrOpc);
MachineBasicBlock *LoopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *ExitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
MF->insert(It, LoopMBB);
MF->insert(It, ExitMBB);
// Transfer the remainder of BB and its successor edges to exitMBB.
ExitMBB->splice(ExitMBB->begin(), BB,
std::next(MachineBasicBlock::iterator(MI)), BB->end());
ExitMBB->transferSuccessorsAndUpdatePHIs(BB);
MachineRegisterInfo &RegInfo = MF->getRegInfo();
unsigned ScratchRes = RegInfo.createVirtualRegister(&ARM64::GPR32RegClass);
unsigned ScratchLo = IncrLo, ScratchHi = IncrHi;
if (BinOpcodeLo) {
assert(BinOpcodeHi && "Expect neither or both opcodes to be defined");
ScratchLo = RegInfo.createVirtualRegister(&ARM64::GPR64RegClass);
ScratchHi = RegInfo.createVirtualRegister(&ARM64::GPR64RegClass);
}
// ThisMBB:
// ...
// fallthrough --> LoopMBB
BB->addSuccessor(LoopMBB);
// LoopMBB:
// ldxp DestLo, DestHi, Ptr
// <binoplo> ScratchLo, DestLo, IncrLo
// <binophi> ScratchHi, DestHi, IncrHi
// stxp ScratchRes, ScratchLo, ScratchHi, ptr
// cbnz ScratchRes, LoopMBB
// fallthrough --> ExitMBB
BB = LoopMBB;
BuildMI(BB, DL, TII->get(LdrOpc), DestLo)
.addReg(DestHi, RegState::Define)
.addReg(Ptr);
if (BinOpcodeLo) {
// operand order needs to go the other way for NAND
if (BinOpcodeLo == ARM64::BICXrr) {
std::swap(IncrLo, DestLo);
std::swap(IncrHi, DestHi);
}
BuildMI(BB, DL, TII->get(BinOpcodeLo), ScratchLo).addReg(DestLo).addReg(
IncrLo);
BuildMI(BB, DL, TII->get(BinOpcodeHi), ScratchHi).addReg(DestHi).addReg(
IncrHi);
}
BuildMI(BB, DL, TII->get(StrOpc), ScratchRes)
.addReg(ScratchLo)
.addReg(ScratchHi)
.addReg(Ptr);
BuildMI(BB, DL, TII->get(ARM64::CBNZW)).addReg(ScratchRes).addMBB(LoopMBB);
BB->addSuccessor(LoopMBB);
BB->addSuccessor(ExitMBB);
// ExitMBB:
// ...
BB = ExitMBB;
MI->eraseFromParent(); // The instruction is gone now.
return BB;
}
MachineBasicBlock *
ARM64TargetLowering::EmitAtomicCmpSwap128(MachineInstr *MI,
MachineBasicBlock *BB) const {
unsigned DestLo = MI->getOperand(0).getReg();
unsigned DestHi = MI->getOperand(1).getReg();
unsigned Ptr = MI->getOperand(2).getReg();
unsigned OldValLo = MI->getOperand(3).getReg();
unsigned OldValHi = MI->getOperand(4).getReg();
unsigned NewValLo = MI->getOperand(5).getReg();
unsigned NewValHi = MI->getOperand(6).getReg();
AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(7).getImm());
unsigned ScratchRes = BB->getParent()->getRegInfo().createVirtualRegister(
&ARM64::GPR32RegClass);
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
DebugLoc DL = MI->getDebugLoc();
unsigned LdrOpc, StrOpc;
getExclusiveOperation(16, Ord, LdrOpc, StrOpc);
MachineFunction *MF = BB->getParent();
const BasicBlock *LLVM_BB = BB->getBasicBlock();
MachineFunction::iterator It = BB;
++It; // insert the new blocks after the current block
MachineBasicBlock *Loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *Loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *ExitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
MF->insert(It, Loop1MBB);
MF->insert(It, Loop2MBB);
MF->insert(It, ExitMBB);
// Transfer the remainder of BB and its successor edges to exitMBB.
ExitMBB->splice(ExitMBB->begin(), BB,
std::next(MachineBasicBlock::iterator(MI)), BB->end());
ExitMBB->transferSuccessorsAndUpdatePHIs(BB);
// ThisMBB:
// ...
// fallthrough --> Loop1MBB
BB->addSuccessor(Loop1MBB);
// Loop1MBB:
// ldxp DestLo, DestHi, [Ptr]
// cmp DestLo, OldValLo
// sbc xzr, DestHi, OldValHi
// bne ExitMBB
BB = Loop1MBB;
BuildMI(BB, DL, TII->get(LdrOpc), DestLo)
.addReg(DestHi, RegState::Define)
.addReg(Ptr);
BuildMI(BB, DL, TII->get(ARM64::SUBSXrr), ARM64::XZR).addReg(DestLo).addReg(
OldValLo);
BuildMI(BB, DL, TII->get(ARM64::SBCXr), ARM64::XZR).addReg(DestHi).addReg(
OldValHi);
BuildMI(BB, DL, TII->get(ARM64::Bcc)).addImm(ARM64CC::NE).addMBB(ExitMBB);
BB->addSuccessor(Loop2MBB);
BB->addSuccessor(ExitMBB);
// Loop2MBB:
// stxp ScratchRes, NewValLo, NewValHi, [Ptr]
// cbnz ScratchRes, Loop1MBB
BB = Loop2MBB;
BuildMI(BB, DL, TII->get(StrOpc), ScratchRes)
.addReg(NewValLo)
.addReg(NewValHi)
.addReg(Ptr);
BuildMI(BB, DL, TII->get(ARM64::CBNZW)).addReg(ScratchRes).addMBB(Loop1MBB);
BB->addSuccessor(Loop1MBB);
BB->addSuccessor(ExitMBB);
// ExitMBB:
// ...
BB = ExitMBB;
MI->eraseFromParent(); // The instruction is gone now.
return BB;
}
MachineBasicBlock *ARM64TargetLowering::EmitAtomicMinMax128(
MachineInstr *MI, MachineBasicBlock *BB, unsigned CondCode) const {
// This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
const BasicBlock *LLVM_BB = BB->getBasicBlock();
MachineFunction *MF = BB->getParent();
MachineFunction::iterator It = BB;
++It;
unsigned DestLo = MI->getOperand(0).getReg();
unsigned DestHi = MI->getOperand(1).getReg();
unsigned Ptr = MI->getOperand(2).getReg();
unsigned IncrLo = MI->getOperand(3).getReg();
unsigned IncrHi = MI->getOperand(4).getReg();
AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(5).getImm());
DebugLoc DL = MI->getDebugLoc();
unsigned LdrOpc, StrOpc;
getExclusiveOperation(16, Ord, LdrOpc, StrOpc);
MachineBasicBlock *LoopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *ExitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
MF->insert(It, LoopMBB);
MF->insert(It, ExitMBB);
// Transfer the remainder of BB and its successor edges to exitMBB.
ExitMBB->splice(ExitMBB->begin(), BB,
std::next(MachineBasicBlock::iterator(MI)), BB->end());
ExitMBB->transferSuccessorsAndUpdatePHIs(BB);
MachineRegisterInfo &RegInfo = MF->getRegInfo();
unsigned ScratchRes = RegInfo.createVirtualRegister(&ARM64::GPR32RegClass);
unsigned ScratchLo = RegInfo.createVirtualRegister(&ARM64::GPR64RegClass);
unsigned ScratchHi = RegInfo.createVirtualRegister(&ARM64::GPR64RegClass);
// ThisMBB:
// ...
// fallthrough --> LoopMBB
BB->addSuccessor(LoopMBB);
// LoopMBB:
// ldxp DestLo, DestHi, Ptr
// cmp ScratchLo, DestLo, IncrLo
// sbc xzr, ScratchHi, DestHi, IncrHi
// csel ScratchLo, DestLo, IncrLo, <cmp-op>
// csel ScratchHi, DestHi, IncrHi, <cmp-op>
// stxp ScratchRes, ScratchLo, ScratchHi, ptr
// cbnz ScratchRes, LoopMBB
// fallthrough --> ExitMBB
BB = LoopMBB;
BuildMI(BB, DL, TII->get(LdrOpc), DestLo)
.addReg(DestHi, RegState::Define)
.addReg(Ptr);
BuildMI(BB, DL, TII->get(ARM64::SUBSXrr), ARM64::XZR).addReg(DestLo).addReg(
IncrLo);
BuildMI(BB, DL, TII->get(ARM64::SBCXr), ARM64::XZR).addReg(DestHi).addReg(
IncrHi);
BuildMI(BB, DL, TII->get(ARM64::CSELXr), ScratchLo)
.addReg(DestLo)
.addReg(IncrLo)
.addImm(CondCode);
BuildMI(BB, DL, TII->get(ARM64::CSELXr), ScratchHi)
.addReg(DestHi)
.addReg(IncrHi)
.addImm(CondCode);
BuildMI(BB, DL, TII->get(StrOpc), ScratchRes)
.addReg(ScratchLo)
.addReg(ScratchHi)
.addReg(Ptr);
BuildMI(BB, DL, TII->get(ARM64::CBNZW)).addReg(ScratchRes).addMBB(LoopMBB);
BB->addSuccessor(LoopMBB);
BB->addSuccessor(ExitMBB);
// ExitMBB:
// ...
BB = ExitMBB;
MI->eraseFromParent(); // The instruction is gone now.
return BB;
}
MachineBasicBlock *
ARM64TargetLowering::EmitF128CSEL(MachineInstr *MI,
MachineBasicBlock *MBB) const {
@ -1209,106 +758,6 @@ ARM64TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
assert(0 && "Unexpected instruction for custom inserter!");
break;
case ARM64::ATOMIC_LOAD_ADD_I8:
return EmitAtomicBinary(MI, BB, 1, ARM64::ADDWrr);
case ARM64::ATOMIC_LOAD_ADD_I16:
return EmitAtomicBinary(MI, BB, 2, ARM64::ADDWrr);
case ARM64::ATOMIC_LOAD_ADD_I32:
return EmitAtomicBinary(MI, BB, 4, ARM64::ADDWrr);
case ARM64::ATOMIC_LOAD_ADD_I64:
return EmitAtomicBinary(MI, BB, 8, ARM64::ADDXrr);
case ARM64::ATOMIC_LOAD_ADD_I128:
return EmitAtomicBinary128(MI, BB, ARM64::ADDSXrr, ARM64::ADCXr);
case ARM64::ATOMIC_LOAD_AND_I8:
return EmitAtomicBinary(MI, BB, 1, ARM64::ANDWrr);
case ARM64::ATOMIC_LOAD_AND_I16:
return EmitAtomicBinary(MI, BB, 2, ARM64::ANDWrr);
case ARM64::ATOMIC_LOAD_AND_I32:
return EmitAtomicBinary(MI, BB, 4, ARM64::ANDWrr);
case ARM64::ATOMIC_LOAD_AND_I64:
return EmitAtomicBinary(MI, BB, 8, ARM64::ANDXrr);
case ARM64::ATOMIC_LOAD_AND_I128:
return EmitAtomicBinary128(MI, BB, ARM64::ANDXrr, ARM64::ANDXrr);
case ARM64::ATOMIC_LOAD_OR_I8:
return EmitAtomicBinary(MI, BB, 1, ARM64::ORRWrr);
case ARM64::ATOMIC_LOAD_OR_I16:
return EmitAtomicBinary(MI, BB, 2, ARM64::ORRWrr);
case ARM64::ATOMIC_LOAD_OR_I32:
return EmitAtomicBinary(MI, BB, 4, ARM64::ORRWrr);
case ARM64::ATOMIC_LOAD_OR_I64:
return EmitAtomicBinary(MI, BB, 8, ARM64::ORRXrr);
case ARM64::ATOMIC_LOAD_OR_I128:
return EmitAtomicBinary128(MI, BB, ARM64::ORRXrr, ARM64::ORRXrr);
case ARM64::ATOMIC_LOAD_XOR_I8:
return EmitAtomicBinary(MI, BB, 1, ARM64::EORWrr);
case ARM64::ATOMIC_LOAD_XOR_I16:
return EmitAtomicBinary(MI, BB, 2, ARM64::EORWrr);
case ARM64::ATOMIC_LOAD_XOR_I32:
return EmitAtomicBinary(MI, BB, 4, ARM64::EORWrr);
case ARM64::ATOMIC_LOAD_XOR_I64:
return EmitAtomicBinary(MI, BB, 8, ARM64::EORXrr);
case ARM64::ATOMIC_LOAD_XOR_I128:
return EmitAtomicBinary128(MI, BB, ARM64::EORXrr, ARM64::EORXrr);
case ARM64::ATOMIC_LOAD_NAND_I8:
return EmitAtomicBinary(MI, BB, 1, ARM64::BICWrr);
case ARM64::ATOMIC_LOAD_NAND_I16:
return EmitAtomicBinary(MI, BB, 2, ARM64::BICWrr);
case ARM64::ATOMIC_LOAD_NAND_I32:
return EmitAtomicBinary(MI, BB, 4, ARM64::BICWrr);
case ARM64::ATOMIC_LOAD_NAND_I64:
return EmitAtomicBinary(MI, BB, 8, ARM64::BICXrr);
case ARM64::ATOMIC_LOAD_NAND_I128:
return EmitAtomicBinary128(MI, BB, ARM64::BICXrr, ARM64::BICXrr);
case ARM64::ATOMIC_LOAD_SUB_I8:
return EmitAtomicBinary(MI, BB, 1, ARM64::SUBWrr);
case ARM64::ATOMIC_LOAD_SUB_I16:
return EmitAtomicBinary(MI, BB, 2, ARM64::SUBWrr);
case ARM64::ATOMIC_LOAD_SUB_I32:
return EmitAtomicBinary(MI, BB, 4, ARM64::SUBWrr);
case ARM64::ATOMIC_LOAD_SUB_I64:
return EmitAtomicBinary(MI, BB, 8, ARM64::SUBXrr);
case ARM64::ATOMIC_LOAD_SUB_I128:
return EmitAtomicBinary128(MI, BB, ARM64::SUBSXrr, ARM64::SBCXr);
case ARM64::ATOMIC_LOAD_MIN_I128:
return EmitAtomicMinMax128(MI, BB, ARM64CC::LT);
case ARM64::ATOMIC_LOAD_MAX_I128:
return EmitAtomicMinMax128(MI, BB, ARM64CC::GT);
case ARM64::ATOMIC_LOAD_UMIN_I128:
return EmitAtomicMinMax128(MI, BB, ARM64CC::CC);
case ARM64::ATOMIC_LOAD_UMAX_I128:
return EmitAtomicMinMax128(MI, BB, ARM64CC::HI);
case ARM64::ATOMIC_SWAP_I8:
return EmitAtomicBinary(MI, BB, 1, 0);
case ARM64::ATOMIC_SWAP_I16:
return EmitAtomicBinary(MI, BB, 2, 0);
case ARM64::ATOMIC_SWAP_I32:
return EmitAtomicBinary(MI, BB, 4, 0);
case ARM64::ATOMIC_SWAP_I64:
return EmitAtomicBinary(MI, BB, 8, 0);
case ARM64::ATOMIC_SWAP_I128:
return EmitAtomicBinary128(MI, BB, 0, 0);
case ARM64::ATOMIC_CMP_SWAP_I8:
return EmitAtomicCmpSwap(MI, BB, 1);
case ARM64::ATOMIC_CMP_SWAP_I16:
return EmitAtomicCmpSwap(MI, BB, 2);
case ARM64::ATOMIC_CMP_SWAP_I32:
return EmitAtomicCmpSwap(MI, BB, 4);
case ARM64::ATOMIC_CMP_SWAP_I64:
return EmitAtomicCmpSwap(MI, BB, 8);
case ARM64::ATOMIC_CMP_SWAP_I128:
return EmitAtomicCmpSwap128(MI, BB);
case ARM64::F128CSEL:
return EmitF128CSEL(MI, BB);
@ -7476,113 +6925,12 @@ bool ARM64TargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
return true;
}
/// The only 128-bit atomic operation is an stxp that succeeds. In particular
/// neither ldp nor ldxp are atomic. So the canonical sequence for an atomic
/// load is:
/// loop:
/// ldxp x0, x1, [x8]
/// stxp w2, x0, x1, [x8]
/// cbnz w2, loop
/// If the stxp succeeds then the ldxp managed to get both halves without an
/// intervening stxp from a different thread and the read was atomic.
static void ReplaceATOMIC_LOAD_128(SDNode *N, SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) {
SDLoc DL(N);
AtomicSDNode *AN = cast<AtomicSDNode>(N);
EVT VT = AN->getMemoryVT();
SDValue Zero = DAG.getConstant(0, VT);
// FIXME: Really want ATOMIC_LOAD_NOP but that doesn't fit into the existing
// scheme very well. Given the complexity of what we're already generating, an
// extra couple of ORRs probably won't make much difference.
SDValue Result = DAG.getAtomic(ISD::ATOMIC_LOAD_OR, DL, AN->getMemoryVT(),
N->getOperand(0), N->getOperand(1), Zero,
AN->getMemOperand(), AN->getOrdering(),
AN->getSynchScope());
Results.push_back(Result.getValue(0)); // Value
Results.push_back(Result.getValue(1)); // Chain
}
static void ReplaceATOMIC_OP_128(SDNode *N, SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG, unsigned NewOp) {
SDLoc DL(N);
AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getOrdering();
assert(N->getValueType(0) == MVT::i128 &&
"Only know how to expand i128 atomics");
SmallVector<SDValue, 6> Ops;
Ops.push_back(N->getOperand(1)); // Ptr
// Low part of Val1
Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64,
N->getOperand(2), DAG.getIntPtrConstant(0)));
// High part of Val1
Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64,
N->getOperand(2), DAG.getIntPtrConstant(1)));
if (NewOp == ARM64::ATOMIC_CMP_SWAP_I128) {
// Low part of Val2
Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64,
N->getOperand(3), DAG.getIntPtrConstant(0)));
// High part of Val2
Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64,
N->getOperand(3), DAG.getIntPtrConstant(1)));
}
Ops.push_back(DAG.getTargetConstant(Ordering, MVT::i32));
Ops.push_back(N->getOperand(0)); // Chain
SDVTList Tys = DAG.getVTList(MVT::i64, MVT::i64, MVT::Other);
SDNode *Result = DAG.getMachineNode(NewOp, DL, Tys, Ops);
SDValue OpsF[] = { SDValue(Result, 0), SDValue(Result, 1) };
Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128, OpsF, 2));
Results.push_back(SDValue(Result, 2));
}
void ARM64TargetLowering::ReplaceNodeResults(SDNode *N,
SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const {
switch (N->getOpcode()) {
default:
llvm_unreachable("Don't know how to custom expand this");
case ISD::ATOMIC_LOAD:
ReplaceATOMIC_LOAD_128(N, Results, DAG);
return;
case ISD::ATOMIC_LOAD_ADD:
ReplaceATOMIC_OP_128(N, Results, DAG, ARM64::ATOMIC_LOAD_ADD_I128);
return;
case ISD::ATOMIC_LOAD_SUB:
ReplaceATOMIC_OP_128(N, Results, DAG, ARM64::ATOMIC_LOAD_SUB_I128);
return;
case ISD::ATOMIC_LOAD_AND:
ReplaceATOMIC_OP_128(N, Results, DAG, ARM64::ATOMIC_LOAD_AND_I128);
return;
case ISD::ATOMIC_LOAD_OR:
ReplaceATOMIC_OP_128(N, Results, DAG, ARM64::ATOMIC_LOAD_OR_I128);
return;
case ISD::ATOMIC_LOAD_XOR:
ReplaceATOMIC_OP_128(N, Results, DAG, ARM64::ATOMIC_LOAD_XOR_I128);
return;
case ISD::ATOMIC_LOAD_NAND:
ReplaceATOMIC_OP_128(N, Results, DAG, ARM64::ATOMIC_LOAD_NAND_I128);
return;
case ISD::ATOMIC_SWAP:
ReplaceATOMIC_OP_128(N, Results, DAG, ARM64::ATOMIC_SWAP_I128);
return;
case ISD::ATOMIC_LOAD_MIN:
ReplaceATOMIC_OP_128(N, Results, DAG, ARM64::ATOMIC_LOAD_MIN_I128);
return;
case ISD::ATOMIC_LOAD_MAX:
ReplaceATOMIC_OP_128(N, Results, DAG, ARM64::ATOMIC_LOAD_MAX_I128);
return;
case ISD::ATOMIC_LOAD_UMIN:
ReplaceATOMIC_OP_128(N, Results, DAG, ARM64::ATOMIC_LOAD_UMIN_I128);
return;
case ISD::ATOMIC_LOAD_UMAX:
ReplaceATOMIC_OP_128(N, Results, DAG, ARM64::ATOMIC_LOAD_UMAX_I128);
return;
case ISD::ATOMIC_CMP_SWAP:
ReplaceATOMIC_OP_128(N, Results, DAG, ARM64::ATOMIC_CMP_SWAP_I128);
return;
case ISD::FP_TO_UINT:
case ISD::FP_TO_SINT:
assert(N->getValueType(0) == MVT::i128 && "unexpected illegal conversion");
@ -7590,3 +6938,85 @@ void ARM64TargetLowering::ReplaceNodeResults(SDNode *N,
return;
}
}
bool ARM64TargetLowering::shouldExpandAtomicInIR(Instruction *Inst) const {
// Loads and stores less than 128-bits are already atomic; ones above that
// are doomed anyway, so defer to the default libcall and blame the OS when
// things go wrong:
if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() == 128;
else if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
return LI->getType()->getPrimitiveSizeInBits() == 128;
// For the real atomic operations, we have ldxr/stxr up to 128 bits.
return Inst->getType()->getPrimitiveSizeInBits() <= 128;
}
Value *ARM64TargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
AtomicOrdering Ord) const {
Module *M = Builder.GetInsertBlock()->getParent()->getParent();
Type *ValTy = cast<PointerType>(Addr->getType())->getElementType();
bool IsAcquire =
Ord == Acquire || Ord == AcquireRelease || Ord == SequentiallyConsistent;
// Since i128 isn't legal and intrinsics don't get type-lowered, the ldrexd
// intrinsic must return {i64, i64} and we have to recombine them into a
// single i128 here.
if (ValTy->getPrimitiveSizeInBits() == 128) {
Intrinsic::ID Int =
IsAcquire ? Intrinsic::arm64_ldaxp : Intrinsic::arm64_ldxp;
Function *Ldxr = llvm::Intrinsic::getDeclaration(M, Int);
Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
Value *LoHi = Builder.CreateCall(Ldxr, Addr, "lohi");
Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
return Builder.CreateOr(
Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
}
Type *Tys[] = { Addr->getType() };
Intrinsic::ID Int =
IsAcquire ? Intrinsic::arm64_ldaxr : Intrinsic::arm64_ldxr;
Function *Ldxr = llvm::Intrinsic::getDeclaration(M, Int, Tys);
return Builder.CreateTruncOrBitCast(
Builder.CreateCall(Ldxr, Addr),
cast<PointerType>(Addr->getType())->getElementType());
}
Value *ARM64TargetLowering::emitStoreConditional(IRBuilder<> &Builder,
Value *Val, Value *Addr,
AtomicOrdering Ord) const {
Module *M = Builder.GetInsertBlock()->getParent()->getParent();
bool IsRelease =
Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent;
// Since the intrinsics must have legal type, the i128 intrinsics take two
// parameters: "i64, i64". We must marshal Val into the appropriate form
// before the call.
if (Val->getType()->getPrimitiveSizeInBits() == 128) {
Intrinsic::ID Int =
IsRelease ? Intrinsic::arm64_stlxp : Intrinsic::arm64_stxp;
Function *Stxr = Intrinsic::getDeclaration(M, Int);
Type *Int64Ty = Type::getInt64Ty(M->getContext());
Value *Lo = Builder.CreateTrunc(Val, Int64Ty, "lo");
Value *Hi = Builder.CreateTrunc(Builder.CreateLShr(Val, 64), Int64Ty, "hi");
Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
return Builder.CreateCall3(Stxr, Lo, Hi, Addr);
}
Intrinsic::ID Int =
IsRelease ? Intrinsic::arm64_stlxr : Intrinsic::arm64_stxr;
Type *Tys[] = { Addr->getType() };
Function *Stxr = Intrinsic::getDeclaration(M, Int, Tys);
return Builder.CreateCall2(
Stxr, Builder.CreateZExtOrBitCast(
Val, Stxr->getFunctionType()->getParamType(0)),
Addr);
}

View File

@ -233,19 +233,6 @@ public:
SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const;
MachineBasicBlock *EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
unsigned Size, unsigned BinOpcode) const;
MachineBasicBlock *EmitAtomicCmpSwap(MachineInstr *MI, MachineBasicBlock *BB,
unsigned Size) const;
MachineBasicBlock *EmitAtomicBinary128(MachineInstr *MI,
MachineBasicBlock *BB,
unsigned BinOpcodeLo,
unsigned BinOpcodeHi) const;
MachineBasicBlock *EmitAtomicCmpSwap128(MachineInstr *MI,
MachineBasicBlock *BB) const;
MachineBasicBlock *EmitAtomicMinMax128(MachineInstr *MI,
MachineBasicBlock *BB,
unsigned CondCode) const;
MachineBasicBlock *EmitF128CSEL(MachineInstr *MI,
MachineBasicBlock *BB) const;
@ -293,9 +280,18 @@ public:
const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
/// \brief Returns true if it is beneficial to convert a load of a constant
/// to just the constant itself.
bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
Type *Ty) const override;
Value *emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
AtomicOrdering Ord) const override;
Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val,
Value *Addr, AtomicOrdering Ord) const override;
bool shouldExpandAtomicInIR(Instruction *Inst) const override;
private:
/// Subtarget - Keep a pointer to the ARM64Subtarget around so that we can
/// make the right decision when generating code for different targets.

View File

@ -140,71 +140,6 @@ def : Pat<(relaxed_store<atomic_store_64> am_indexed64:$ptr, GPR64:$val),
def : Pat<(relaxed_store<atomic_store_64> am_unscaled64:$ptr, GPR64:$val),
(STURXi GPR64:$val, am_unscaled64:$ptr)>;
//===----------------------------------
// Atomic read-modify-write operations
//===----------------------------------
// More complicated operations need lots of C++ support, so we just create
// skeletons here for the C++ code to refer to.
let usesCustomInserter = 1, hasCtrlDep = 1, mayLoad = 1, mayStore = 1 in {
multiclass AtomicSizes {
def _I8 : Pseudo<(outs GPR32:$dst),
(ins GPR64sp:$ptr, GPR32:$incr, i32imm:$ordering), []>;
def _I16 : Pseudo<(outs GPR32:$dst),
(ins GPR64sp:$ptr, GPR32:$incr, i32imm:$ordering), []>;
def _I32 : Pseudo<(outs GPR32:$dst),
(ins GPR64sp:$ptr, GPR32:$incr, i32imm:$ordering), []>;
def _I64 : Pseudo<(outs GPR64:$dst),
(ins GPR64sp:$ptr, GPR64:$incr, i32imm:$ordering), []>;
def _I128 : Pseudo<(outs GPR64:$dstlo, GPR64:$dsthi),
(ins GPR64sp:$ptr, GPR64:$incrlo, GPR64:$incrhi,
i32imm:$ordering), []>;
}
}
defm ATOMIC_LOAD_ADD : AtomicSizes;
defm ATOMIC_LOAD_SUB : AtomicSizes;
defm ATOMIC_LOAD_AND : AtomicSizes;
defm ATOMIC_LOAD_OR : AtomicSizes;
defm ATOMIC_LOAD_XOR : AtomicSizes;
defm ATOMIC_LOAD_NAND : AtomicSizes;
defm ATOMIC_SWAP : AtomicSizes;
let Defs = [CPSR] in {
// These operations need a CMP to calculate the correct value
defm ATOMIC_LOAD_MIN : AtomicSizes;
defm ATOMIC_LOAD_MAX : AtomicSizes;
defm ATOMIC_LOAD_UMIN : AtomicSizes;
defm ATOMIC_LOAD_UMAX : AtomicSizes;
}
class AtomicCmpSwap<RegisterClass GPRData>
: Pseudo<(outs GPRData:$dst),
(ins GPR64sp:$ptr, GPRData:$old, GPRData:$new,
i32imm:$ordering), []> {
let usesCustomInserter = 1;
let hasCtrlDep = 1;
let mayLoad = 1;
let mayStore = 1;
let Defs = [CPSR];
}
def ATOMIC_CMP_SWAP_I8 : AtomicCmpSwap<GPR32>;
def ATOMIC_CMP_SWAP_I16 : AtomicCmpSwap<GPR32>;
def ATOMIC_CMP_SWAP_I32 : AtomicCmpSwap<GPR32>;
def ATOMIC_CMP_SWAP_I64 : AtomicCmpSwap<GPR64>;
def ATOMIC_CMP_SWAP_I128
: Pseudo<(outs GPR64:$dstlo, GPR64:$dsthi),
(ins GPR64sp:$ptr, GPR64:$oldlo, GPR64:$oldhi,
GPR64:$newlo, GPR64:$newhi, i32imm:$ordering), []> {
let usesCustomInserter = 1;
let hasCtrlDep = 1;
let mayLoad = 1;
let mayStore = 1;
let Defs = [CPSR];
}
//===----------------------------------
// Low-level exclusive operations
//===----------------------------------

View File

@ -5,13 +5,14 @@
define i128 @val_compare_and_swap(i128* %p, i128 %oldval, i128 %newval) {
; CHECK-LABEL: val_compare_and_swap:
; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
; CHECK: ldaxp [[RESULTLO:x[0-9]+]], [[RESULTHI:x[0-9]+]], [x0]
; CHECK: cmp [[RESULTLO]], x2
; CHECK: sbc xzr, [[RESULTHI]], x3
; CHECK: b.ne [[LABEL2:.?LBB[0-9]+_[0-9]+]]
; CHECK: stxp [[SCRATCH_RES:w[0-9]+]], x4, x5, [x0]
; CHECK: ldaxp [[RESULTLO:x[0-9]+]], [[RESULTHI:x[0-9]+]], [x[[ADDR:[0-9]+]]]
; CHECK-DAG: eor [[MISMATCH_LO:x[0-9]+]], [[RESULTLO]], x2
; CHECK-DAG: eor [[MISMATCH_HI:x[0-9]+]], [[RESULTHI]], x3
; CHECK: orr [[MISMATCH:x[0-9]+]], [[MISMATCH_LO]], [[MISMATCH_HI]]
; CHECK: cbnz [[MISMATCH]], [[DONE:.LBB[0-9]+_[0-9]+]]
; CHECK: stxp [[SCRATCH_RES:w[0-9]+]], x4, x5, [x[[ADDR]]]
; CHECK: cbnz [[SCRATCH_RES]], [[LABEL]]
; CHECK: [[LABEL2]]:
; CHECK: [[DONE]]:
%val = cmpxchg i128* %p, i128 %oldval, i128 %newval acquire acquire
ret i128 %val
}
@ -20,13 +21,13 @@ define void @fetch_and_nand(i128* %p, i128 %bits) {
; CHECK-LABEL: fetch_and_nand:
; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
; CHECK: ldxp [[DEST_REGLO:x[0-9]+]], [[DEST_REGHI:x[0-9]+]], [x0]
; CHECK: bic [[SCRATCH_REGLO:x[0-9]+]], x2, [[DEST_REGLO]]
; CHECK: bic [[SCRATCH_REGHI:x[0-9]+]], x3, [[DEST_REGHI]]
; CHECK-DAG: bic [[SCRATCH_REGLO:x[0-9]+]], [[DEST_REGLO]], x2
; CHECK-DAG: bic [[SCRATCH_REGHI:x[0-9]+]], [[DEST_REGHI]], x3
; CHECK: stlxp [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0]
; CHECK: cbnz [[SCRATCH_RES]], [[LABEL]]
; CHECK: str [[DEST_REGHI]]
; CHECK: str [[DEST_REGLO]]
; CHECK-DAG: str [[DEST_REGHI]]
; CHECK-DAG: str [[DEST_REGLO]]
%val = atomicrmw nand i128* %p, i128 %bits release
store i128 %val, i128* @var, align 16
ret void
@ -36,13 +37,13 @@ define void @fetch_and_or(i128* %p, i128 %bits) {
; CHECK-LABEL: fetch_and_or:
; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
; CHECK: ldaxp [[DEST_REGLO:x[0-9]+]], [[DEST_REGHI:x[0-9]+]], [x0]
; CHECK: orr [[SCRATCH_REGLO:x[0-9]+]], [[DEST_REGLO]], x2
; CHECK: orr [[SCRATCH_REGHI:x[0-9]+]], [[DEST_REGHI]], x3
; CHECK-DAG: orr [[SCRATCH_REGLO:x[0-9]+]], [[DEST_REGLO]], x2
; CHECK-DAG: orr [[SCRATCH_REGHI:x[0-9]+]], [[DEST_REGHI]], x3
; CHECK: stlxp [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0]
; CHECK: cbnz [[SCRATCH_RES]], [[LABEL]]
; CHECK: str [[DEST_REGHI]]
; CHECK: str [[DEST_REGLO]]
; CHECK-DAG: str [[DEST_REGHI]]
; CHECK-DAG: str [[DEST_REGLO]]
%val = atomicrmw or i128* %p, i128 %bits seq_cst
store i128 %val, i128* @var, align 16
ret void
@ -53,12 +54,12 @@ define void @fetch_and_add(i128* %p, i128 %bits) {
; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
; CHECK: ldaxp [[DEST_REGLO:x[0-9]+]], [[DEST_REGHI:x[0-9]+]], [x0]
; CHECK: adds [[SCRATCH_REGLO:x[0-9]+]], [[DEST_REGLO]], x2
; CHECK: adc [[SCRATCH_REGHI:x[0-9]+]], [[DEST_REGHI]], x3
; CHECK: adcs [[SCRATCH_REGHI:x[0-9]+]], [[DEST_REGHI]], x3
; CHECK: stlxp [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0]
; CHECK: cbnz [[SCRATCH_RES]], [[LABEL]]
; CHECK: str [[DEST_REGHI]]
; CHECK: str [[DEST_REGLO]]
; CHECK-DAG: str [[DEST_REGHI]]
; CHECK-DAG: str [[DEST_REGLO]]
%val = atomicrmw add i128* %p, i128 %bits seq_cst
store i128 %val, i128* @var, align 16
ret void
@ -69,12 +70,12 @@ define void @fetch_and_sub(i128* %p, i128 %bits) {
; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
; CHECK: ldaxp [[DEST_REGLO:x[0-9]+]], [[DEST_REGHI:x[0-9]+]], [x0]
; CHECK: subs [[SCRATCH_REGLO:x[0-9]+]], [[DEST_REGLO]], x2
; CHECK: sbc [[SCRATCH_REGHI:x[0-9]+]], [[DEST_REGHI]], x3
; CHECK: sbcs [[SCRATCH_REGHI:x[0-9]+]], [[DEST_REGHI]], x3
; CHECK: stlxp [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0]
; CHECK: cbnz [[SCRATCH_RES]], [[LABEL]]
; CHECK: str [[DEST_REGHI]]
; CHECK: str [[DEST_REGLO]]
; CHECK-DAG: str [[DEST_REGHI]]
; CHECK-DAG: str [[DEST_REGLO]]
%val = atomicrmw sub i128* %p, i128 %bits seq_cst
store i128 %val, i128* @var, align 16
ret void
@ -85,14 +86,18 @@ define void @fetch_and_min(i128* %p, i128 %bits) {
; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
; CHECK: ldaxp [[DEST_REGLO:x[0-9]+]], [[DEST_REGHI:x[0-9]+]], [x0]
; CHECK: cmp [[DEST_REGLO]], x2
; CHECK: sbc xzr, [[DEST_REGHI]], x3
; CHECK: csel [[SCRATCH_REGLO:x[0-9]+]], [[DEST_REGLO]], x2, lt
; CHECK: csel [[SCRATCH_REGHI:x[0-9]+]], [[DEST_REGHI]], x3, lt
; CHECK: csinc [[LOCMP:w[0-9]+]], wzr, wzr, hi
; CHECK: cmp [[DEST_REGHI:x[0-9]+]], x3
; CHECK: csinc [[HICMP:w[0-9]+]], wzr, wzr, gt
; CHECK: csel [[CMP:w[0-9]+]], [[LOCMP]], [[HICMP]], eq
; CHECK: cmp [[CMP]], #0
; CHECK-DAG: csel [[SCRATCH_REGHI:x[0-9]+]], [[DEST_REGHI]], x3, ne
; CHECK-DAG: csel [[SCRATCH_REGLO:x[0-9]+]], [[DEST_REGLO]], x2, ne
; CHECK: stlxp [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0]
; CHECK: cbnz [[SCRATCH_RES]], [[LABEL]]
; CHECK: str [[DEST_REGHI]]
; CHECK: str [[DEST_REGLO]]
; CHECK-DAG: str [[DEST_REGHI]]
; CHECK-DAG: str [[DEST_REGLO]]
%val = atomicrmw min i128* %p, i128 %bits seq_cst
store i128 %val, i128* @var, align 16
ret void
@ -103,14 +108,18 @@ define void @fetch_and_max(i128* %p, i128 %bits) {
; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
; CHECK: ldaxp [[DEST_REGLO:x[0-9]+]], [[DEST_REGHI:x[0-9]+]], [x0]
; CHECK: cmp [[DEST_REGLO]], x2
; CHECK: sbc xzr, [[DEST_REGHI]], x3
; CHECK: csel [[SCRATCH_REGLO:x[0-9]+]], [[DEST_REGLO]], x2, gt
; CHECK: csel [[SCRATCH_REGHI:x[0-9]+]], [[DEST_REGHI]], x3, gt
; CHECK: csinc [[LOCMP:w[0-9]+]], wzr, wzr, ls
; CHECK: cmp [[DEST_REGHI:x[0-9]+]], x3
; CHECK: csinc [[HICMP:w[0-9]+]], wzr, wzr, le
; CHECK: csel [[CMP:w[0-9]+]], [[LOCMP]], [[HICMP]], eq
; CHECK: cmp [[CMP]], #0
; CHECK-DAG: csel [[SCRATCH_REGHI:x[0-9]+]], [[DEST_REGHI]], x3, ne
; CHECK-DAG: csel [[SCRATCH_REGLO:x[0-9]+]], [[DEST_REGLO]], x2, ne
; CHECK: stlxp [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0]
; CHECK: cbnz [[SCRATCH_RES]], [[LABEL]]
; CHECK: str [[DEST_REGHI]]
; CHECK: str [[DEST_REGLO]]
; CHECK-DAG: str [[DEST_REGHI]]
; CHECK-DAG: str [[DEST_REGLO]]
%val = atomicrmw max i128* %p, i128 %bits seq_cst
store i128 %val, i128* @var, align 16
ret void
@ -121,14 +130,18 @@ define void @fetch_and_umin(i128* %p, i128 %bits) {
; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
; CHECK: ldaxp [[DEST_REGLO:x[0-9]+]], [[DEST_REGHI:x[0-9]+]], [x0]
; CHECK: cmp [[DEST_REGLO]], x2
; CHECK: sbc xzr, [[DEST_REGHI]], x3
; CHECK: csel [[SCRATCH_REGLO:x[0-9]+]], [[DEST_REGLO]], x2, cc
; CHECK: csel [[SCRATCH_REGHI:x[0-9]+]], [[DEST_REGHI]], x3, cc
; CHECK: csinc [[LOCMP:w[0-9]+]], wzr, wzr, hi
; CHECK: cmp [[DEST_REGHI:x[0-9]+]], x3
; CHECK: csinc [[HICMP:w[0-9]+]], wzr, wzr, hi
; CHECK: csel [[CMP:w[0-9]+]], [[LOCMP]], [[HICMP]], eq
; CHECK: cmp [[CMP]], #0
; CHECK-DAG: csel [[SCRATCH_REGHI:x[0-9]+]], [[DEST_REGHI]], x3, ne
; CHECK-DAG: csel [[SCRATCH_REGLO:x[0-9]+]], [[DEST_REGLO]], x2, ne
; CHECK: stlxp [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0]
; CHECK: cbnz [[SCRATCH_RES]], [[LABEL]]
; CHECK: str [[DEST_REGHI]]
; CHECK: str [[DEST_REGLO]]
; CHECK-DAG: str [[DEST_REGHI]]
; CHECK-DAG: str [[DEST_REGLO]]
%val = atomicrmw umin i128* %p, i128 %bits seq_cst
store i128 %val, i128* @var, align 16
ret void
@ -139,14 +152,18 @@ define void @fetch_and_umax(i128* %p, i128 %bits) {
; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
; CHECK: ldaxp [[DEST_REGLO:x[0-9]+]], [[DEST_REGHI:x[0-9]+]], [x0]
; CHECK: cmp [[DEST_REGLO]], x2
; CHECK: sbc xzr, [[DEST_REGHI]], x3
; CHECK: csel [[SCRATCH_REGLO:x[0-9]+]], [[DEST_REGLO]], x2, hi
; CHECK: csel [[SCRATCH_REGHI:x[0-9]+]], [[DEST_REGHI]], x3, hi
; CHECK: csinc [[LOCMP:w[0-9]+]], wzr, wzr, ls
; CHECK: cmp [[DEST_REGHI:x[0-9]+]], x3
; CHECK: csinc [[HICMP:w[0-9]+]], wzr, wzr, ls
; CHECK: csel [[CMP:w[0-9]+]], [[LOCMP]], [[HICMP]], eq
; CHECK: cmp [[CMP]], #0
; CHECK-DAG: csel [[SCRATCH_REGHI:x[0-9]+]], [[DEST_REGHI]], x3, ne
; CHECK-DAG: csel [[SCRATCH_REGLO:x[0-9]+]], [[DEST_REGLO]], x2, ne
; CHECK: stlxp [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0]
; CHECK: cbnz [[SCRATCH_RES]], [[LABEL]]
; CHECK: str [[DEST_REGHI]]
; CHECK: str [[DEST_REGLO]]
; CHECK-DAG: str [[DEST_REGHI]]
; CHECK-DAG: str [[DEST_REGLO]]
%val = atomicrmw umax i128* %p, i128 %bits seq_cst
store i128 %val, i128* @var, align 16
ret void
@ -164,12 +181,7 @@ define i128 @atomic_load_seq_cst(i128* %p) {
define i128 @atomic_load_relaxed(i128* %p) {
; CHECK-LABEL: atomic_load_relaxed:
; CHECK-NOT: dmb
; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
; CHECK: ldxp [[LO:x[0-9]+]], [[HI:x[0-9]+]], [x0]
; CHECK: orr [[SAMELO:x[0-9]+]], [[LO]], xzr
; CHECK: orr [[SAMEHI:x[0-9]+]], [[HI]], xzr
; CHECK: stxp [[SUCCESS:w[0-9]+]], [[SAMELO]], [[SAMEHI]], [x0]
; CHECK: cbnz [[SUCCESS]], [[LABEL]]
; CHECK-NOT: dmb
%r = load atomic i128* %p monotonic, align 16
ret i128 %r

View File

@ -3,10 +3,9 @@
define i32 @val_compare_and_swap(i32* %p) {
; CHECK-LABEL: val_compare_and_swap:
; CHECK: orr [[NEWVAL_REG:w[0-9]+]], wzr, #0x4
; CHECK: orr [[OLDVAL_REG:w[0-9]+]], wzr, #0x7
; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
; CHECK: ldaxr [[RESULT:w[0-9]+]], [x0]
; CHECK: cmp [[RESULT]], [[OLDVAL_REG]]
; CHECK: cmp [[RESULT]], #7
; CHECK: b.ne [[LABEL2:.?LBB[0-9]+_[0-9]+]]
; CHECK: stxr [[SCRATCH_REG:w[0-9]+]], [[NEWVAL_REG]], [x0]
; CHECK: cbnz [[SCRATCH_REG]], [[LABEL]]
@ -18,10 +17,9 @@ define i32 @val_compare_and_swap(i32* %p) {
define i64 @val_compare_and_swap_64(i64* %p) {
; CHECK-LABEL: val_compare_and_swap_64:
; CHECK: orr w[[NEWVAL_REG:[0-9]+]], wzr, #0x4
; CHECK: orr w[[OLDVAL_REG:[0-9]+]], wzr, #0x7
; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
; CHECK: ldxr [[RESULT:x[0-9]+]], [x0]
; CHECK: cmp [[RESULT]], x[[OLDVAL_REG]]
; CHECK: cmp [[RESULT]], #7
; CHECK: b.ne [[LABEL2:.?LBB[0-9]+_[0-9]+]]
; CHECK-NOT: stxr x[[NEWVAL_REG]], x[[NEWVAL_REG]]
; CHECK: stxr [[SCRATCH_REG:w[0-9]+]], x[[NEWVAL_REG]], [x0]
@ -33,10 +31,9 @@ define i64 @val_compare_and_swap_64(i64* %p) {
define i32 @fetch_and_nand(i32* %p) {
; CHECK-LABEL: fetch_and_nand:
; CHECK: orr [[OLDVAL_REG:w[0-9]+]], wzr, #0x7
; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
; CHECK: ldxr w[[DEST_REG:[0-9]+]], [x0]
; CHECK: bic [[SCRATCH2_REG:w[0-9]+]], [[OLDVAL_REG]], w[[DEST_REG]]
; CHECK: and [[SCRATCH2_REG:w[0-9]+]], w[[DEST_REG]], #0xfffffff8
; CHECK-NOT: stlxr [[SCRATCH2_REG]], [[SCRATCH2_REG]]
; CHECK: stlxr [[SCRATCH_REG:w[0-9]+]], [[SCRATCH2_REG]], [x0]
; CHECK: cbnz [[SCRATCH_REG]], [[LABEL]]
@ -47,13 +44,13 @@ define i32 @fetch_and_nand(i32* %p) {
define i64 @fetch_and_nand_64(i64* %p) {
; CHECK-LABEL: fetch_and_nand_64:
; CHECK: orr w[[OLDVAL_REG:[0-9]+]], wzr, #0x7
; CHECK: mov x[[ADDR:[0-9]+]], x0
; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
; CHECK: ldaxr [[DEST_REG:x[0-9]+]], [x0]
; CHECK: bic [[SCRATCH2_REG:x[0-9]+]], x[[OLDVAL_REG]], [[DEST_REG]]
; CHECK: stlxr [[SCRATCH_REG:w[0-9]+]], [[SCRATCH2_REG]], [x0]
; CHECK: ldaxr [[DEST_REG:x[0-9]+]], [x[[ADDR]]]
; CHECK: and [[SCRATCH2_REG:x[0-9]+]], [[DEST_REG]], #0xfffffffffffffff8
; CHECK: stlxr [[SCRATCH_REG:w[0-9]+]], [[SCRATCH2_REG]], [x[[ADDR]]]
; CHECK: cbnz [[SCRATCH_REG]], [[LABEL]]
; CHECK: mov x0, [[DEST_REG]]
%val = atomicrmw nand i64* %p, i64 7 acq_rel
ret i64 %val
}
@ -74,13 +71,12 @@ define i32 @fetch_and_or(i32* %p) {
define i64 @fetch_and_or_64(i64* %p) {
; CHECK: fetch_and_or_64:
; CHECK: orr w[[OLDVAL_REG:[0-9]+]], wzr, #0x7
; CHECK: mov x[[ADDR:[0-9]+]], x0
; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
; CHECK: ldxr [[DEST_REG:x[0-9]+]], [x0]
; CHECK: orr [[SCRATCH2_REG:x[0-9]+]], [[DEST_REG]], x[[OLDVAL_REG]]
; CHECK: stxr [[SCRATCH_REG:w[0-9]+]], [[SCRATCH2_REG]], [x0]
; CHECK: ldxr [[DEST_REG:x[0-9]+]], [x[[ADDR]]]
; CHECK: orr [[SCRATCH2_REG:x[0-9]+]], [[DEST_REG]], #0x7
; CHECK: stxr [[SCRATCH_REG:w[0-9]+]], [[SCRATCH2_REG]], [x[[ADDR]]]
; CHECK: cbnz [[SCRATCH_REG]], [[LABEL]]
; CHECK: mov x0, [[DEST_REG]]
%val = atomicrmw or i64* %p, i64 7 monotonic
ret i64 %val
}