From a7477c993d1936172db11b8b8e17b9179f6e4bf0 Mon Sep 17 00:00:00 2001 From: Ahmed Bougacha Date: Wed, 20 Jul 2016 21:12:32 +0000 Subject: [PATCH] [AArch64][FastISel] Select -O0 legal cmpxchg. At -O0, cmpxchg survives AtomicExpand: it's mostly straightforward to select it in fast-isel, and let the pseudo be expanded later. extractvalues on the result are the tricky part: the generic logic only works for legal types (and it would be painful to make it support illegal types), so we can only support i32/i64 cmpxchg. llvm-svn: 276183 --- lib/Target/AArch64/AArch64FastISel.cpp | 55 +++++++++++++++++ test/CodeGen/AArch64/cmpxchg-O0.ll | 2 +- test/CodeGen/AArch64/fast-isel-cmpxchg.ll | 72 +++++++++++++++++++++++ 3 files changed, 128 insertions(+), 1 deletion(-) create mode 100644 test/CodeGen/AArch64/fast-isel-cmpxchg.ll diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp index ac98e6674e1..336c21e7fa9 100644 --- a/lib/Target/AArch64/AArch64FastISel.cpp +++ b/lib/Target/AArch64/AArch64FastISel.cpp @@ -134,6 +134,7 @@ private: bool selectFRem(const Instruction *I); bool selectSDiv(const Instruction *I); bool selectGetElementPtr(const Instruction *I); + bool selectAtomicCmpXchg(const AtomicCmpXchgInst *I); // Utility helper routines. bool isTypeLegal(Type *Ty, MVT &VT); @@ -4940,6 +4941,58 @@ bool AArch64FastISel::selectGetElementPtr(const Instruction *I) { return true; } +bool AArch64FastISel::selectAtomicCmpXchg(const AtomicCmpXchgInst *I) { + assert(TM.getOptLevel() == CodeGenOpt::None && + "cmpxchg survived AtomicExpand at optlevel > -O0"); + + auto *RetPairTy = cast(I->getType()); + Type *RetTy = RetPairTy->getTypeAtIndex(0U); + assert(RetPairTy->getTypeAtIndex(1U)->isIntegerTy(1) && + "cmpxchg has a non-i1 status result"); + + MVT VT; + if (!isTypeLegal(RetTy, VT)) + return false; + + const TargetRegisterClass *ResRC; + unsigned Opc; + // This only supports i32/i64, because i8/i16 aren't legal, and the generic + // extractvalue selection doesn't support that. + if (VT == MVT::i32) { + Opc = AArch64::CMP_SWAP_32; + ResRC = &AArch64::GPR32RegClass; + } else if (VT == MVT::i64) { + Opc = AArch64::CMP_SWAP_64; + ResRC = &AArch64::GPR64RegClass; + } else { + return false; + } + + const MCInstrDesc &II = TII.get(Opc); + + const unsigned AddrReg = constrainOperandRegClass( + II, getRegForValue(I->getPointerOperand()), II.getNumDefs()); + const unsigned DesiredReg = constrainOperandRegClass( + II, getRegForValue(I->getCompareOperand()), II.getNumDefs() + 1); + const unsigned NewReg = constrainOperandRegClass( + II, getRegForValue(I->getNewValOperand()), II.getNumDefs() + 2); + + const unsigned ResultReg1 = createResultReg(ResRC); + const unsigned ResultReg2 = createResultReg(&AArch64::GPR32RegClass); + + // FIXME: MachineMemOperand doesn't support cmpxchg yet. + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) + .addReg(ResultReg1, RegState::Define) + .addReg(ResultReg2, RegState::Define) + .addReg(AddrReg) + .addReg(DesiredReg) + .addReg(NewReg); + + assert((ResultReg1 + 1) == ResultReg2 && "Nonconsecutive result registers."); + updateValueMap(I, ResultReg1, 2); + return true; +} + bool AArch64FastISel::fastSelectInstruction(const Instruction *I) { switch (I->getOpcode()) { default: @@ -5013,6 +5066,8 @@ bool AArch64FastISel::fastSelectInstruction(const Instruction *I) { return selectFRem(I); case Instruction::GetElementPtr: return selectGetElementPtr(I); + case Instruction::AtomicCmpXchg: + return selectAtomicCmpXchg(cast(I)); } // fall-back to target-independent instruction selection. diff --git a/test/CodeGen/AArch64/cmpxchg-O0.ll b/test/CodeGen/AArch64/cmpxchg-O0.ll index c79d82a6377..acf2ce323e4 100644 --- a/test/CodeGen/AArch64/cmpxchg-O0.ll +++ b/test/CodeGen/AArch64/cmpxchg-O0.ll @@ -1,4 +1,4 @@ -; RUN: llc -verify-machineinstrs -mtriple=aarch64-linux-gnu -O0 %s -o - | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=aarch64-linux-gnu -O0 -fast-isel=0 %s -o - | FileCheck %s define { i8, i1 } @test_cmpxchg_8(i8* %addr, i8 %desired, i8 %new) nounwind { ; CHECK-LABEL: test_cmpxchg_8: diff --git a/test/CodeGen/AArch64/fast-isel-cmpxchg.ll b/test/CodeGen/AArch64/fast-isel-cmpxchg.ll new file mode 100644 index 00000000000..d20e693df83 --- /dev/null +++ b/test/CodeGen/AArch64/fast-isel-cmpxchg.ll @@ -0,0 +1,72 @@ +; RUN: llc -mtriple=aarch64-- -O0 -fast-isel -fast-isel-abort=4 -verify-machineinstrs < %s | FileCheck %s + +; CHECK-LABEL: cmpxchg_monotonic_32: +; CHECK: [[RETRY:.LBB[0-9_]+]]: +; CHECK-NEXT: ldaxr [[OLD:w[0-9]+]], [x0] +; CHECK-NEXT: cmp [[OLD]], w1 +; CHECK-NEXT: b.ne [[DONE:.LBB[0-9_]+]] +; CHECK-NEXT: // BB#2: +; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], w2, [x0] +; CHECK-NEXT: cbnz [[STATUS]], [[RETRY]] +; CHECK-NEXT: [[DONE]]: +; CHECK-NEXT: and [[STATUS32:w[0-9]+]], [[STATUS]], #0x1 +; CHECK-NEXT: str [[STATUS32]], [x3] +; CHECK-NEXT: mov w0, [[OLD]] +; CHECK-NEXT: ret +define i32 @cmpxchg_monotonic_32(i32* %p, i32 %cmp, i32 %new, i32* %ps) #0 { + %tmp0 = cmpxchg i32* %p, i32 %cmp, i32 %new monotonic monotonic + %tmp1 = extractvalue { i32, i1 } %tmp0, 0 + %tmp2 = extractvalue { i32, i1 } %tmp0, 1 + %tmp3 = zext i1 %tmp2 to i32 + store i32 %tmp3, i32* %ps + ret i32 %tmp1 +} + +; CHECK-LABEL: cmpxchg_acq_rel_32_load: +; CHECK: // BB#0: +; CHECK-NEXT: ldr [[NEW:w[0-9]+]], [x2] +; CHECK-NEXT: [[RETRY:.LBB[0-9_]+]]: +; CHECK-NEXT: ldaxr [[OLD:w[0-9]+]], [x0] +; CHECK-NEXT: cmp [[OLD]], w1 +; CHECK-NEXT: b.ne [[DONE:.LBB[0-9_]+]] +; CHECK-NEXT: // BB#2: +; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x0] +; CHECK-NEXT: cbnz [[STATUS]], [[RETRY]] +; CHECK-NEXT: [[DONE]]: +; CHECK-NEXT: and [[STATUS32:w[0-9]+]], [[STATUS]], #0x1 +; CHECK-NEXT: str [[STATUS32]], [x3] +; CHECK-NEXT: mov w0, [[OLD]] +; CHECK-NEXT: ret +define i32 @cmpxchg_acq_rel_32_load(i32* %p, i32 %cmp, i32* %pnew, i32* %ps) #0 { + %new = load i32, i32* %pnew + %tmp0 = cmpxchg i32* %p, i32 %cmp, i32 %new acq_rel acquire + %tmp1 = extractvalue { i32, i1 } %tmp0, 0 + %tmp2 = extractvalue { i32, i1 } %tmp0, 1 + %tmp3 = zext i1 %tmp2 to i32 + store i32 %tmp3, i32* %ps + ret i32 %tmp1 +} + +; CHECK-LABEL: cmpxchg_seq_cst_64: +; CHECK: [[RETRY:.LBB[0-9_]+]]: +; CHECK-NEXT: ldaxr [[OLD:x[0-9]+]], [x0] +; CHECK-NEXT: cmp [[OLD]], x1 +; CHECK-NEXT: b.ne [[DONE:.LBB[0-9_]+]] +; CHECK-NEXT: // BB#2: +; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], x2, [x0] +; CHECK-NEXT: cbnz [[STATUS]], [[RETRY]] +; CHECK-NEXT: [[DONE]]: +; CHECK-NEXT: and [[STATUS32:w[0-9]+]], [[STATUS]], #0x1 +; CHECK-NEXT: str [[STATUS32]], [x3] +; CHECK-NEXT: mov x0, [[OLD]] +; CHECK-NEXT: ret +define i64 @cmpxchg_seq_cst_64(i64* %p, i64 %cmp, i64 %new, i32* %ps) #0 { + %tmp0 = cmpxchg i64* %p, i64 %cmp, i64 %new seq_cst seq_cst + %tmp1 = extractvalue { i64, i1 } %tmp0, 0 + %tmp2 = extractvalue { i64, i1 } %tmp0, 1 + %tmp3 = zext i1 %tmp2 to i32 + store i32 %tmp3, i32* %ps + ret i64 %tmp1 +} + +attributes #0 = { nounwind }