From 9abc810a4322579e27526bde7ae2ad43d07c557d Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Fri, 2 Jul 2021 10:20:41 -0500 Subject: [PATCH] [OpaquePtr] Add type parameter to emitLoadLinked Differential Revision: https://reviews.llvm.org/D105353 --- include/llvm/CodeGen/TargetLowering.h | 4 +- lib/CodeGen/AtomicExpandPass.cpp | 11 +- lib/Target/AArch64/AArch64ISelLowering.cpp | 17 ++- lib/Target/AArch64/AArch64ISelLowering.h | 2 +- lib/Target/ARM/ARMISelLowering.cpp | 16 ++- lib/Target/ARM/ARMISelLowering.h | 2 +- lib/Target/Hexagon/HexagonISelLowering.cpp | 13 +- lib/Target/Hexagon/HexagonISelLowering.h | 2 +- test/CodeGen/Hexagon/atomic-opaque-basic.ll | 125 ++++++++++++++++++++ 9 files changed, 156 insertions(+), 36 deletions(-) create mode 100644 test/CodeGen/Hexagon/atomic-opaque-basic.ll diff --git a/include/llvm/CodeGen/TargetLowering.h b/include/llvm/CodeGen/TargetLowering.h index 47d6ca43a5a..e2474e21052 100644 --- a/include/llvm/CodeGen/TargetLowering.h +++ b/include/llvm/CodeGen/TargetLowering.h @@ -1879,8 +1879,8 @@ public: /// corresponding pointee type. This may entail some non-trivial operations to /// truncate or reconstruct types that will be illegal in the backend. See /// ARMISelLowering for an example implementation. - virtual Value *emitLoadLinked(IRBuilderBase &Builder, Value *Addr, - AtomicOrdering Ord) const { + virtual Value *emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, + Value *Addr, AtomicOrdering Ord) const { llvm_unreachable("Load linked unimplemented on this target"); } diff --git a/lib/CodeGen/AtomicExpandPass.cpp b/lib/CodeGen/AtomicExpandPass.cpp index 4261940014b..ec996b04521 100644 --- a/lib/CodeGen/AtomicExpandPass.cpp +++ b/lib/CodeGen/AtomicExpandPass.cpp @@ -425,8 +425,8 @@ bool AtomicExpand::expandAtomicLoadToLL(LoadInst *LI) { // On some architectures, load-linked instructions are atomic for larger // sizes than normal loads. For example, the only 64-bit load guaranteed // to be single-copy atomic by ARM is an ldrexd (A3.5.3). - Value *Val = - TLI->emitLoadLinked(Builder, LI->getPointerOperand(), LI->getOrdering()); + Value *Val = TLI->emitLoadLinked(Builder, LI->getType(), + LI->getPointerOperand(), LI->getOrdering()); TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder); LI->replaceAllUsesWith(Val); @@ -1101,7 +1101,7 @@ Value *AtomicExpand::insertRMWLLSCLoop( // Start the main loop block now that we've taken care of the preliminaries. Builder.SetInsertPoint(LoopBB); - Value *Loaded = TLI->emitLoadLinked(Builder, Addr, MemOpOrder); + Value *Loaded = TLI->emitLoadLinked(Builder, ResultTy, Addr, MemOpOrder); Value *NewVal = PerformOp(Builder, Loaded); @@ -1269,7 +1269,7 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { // Start the main loop block now that we've taken care of the preliminaries. Builder.SetInsertPoint(StartBB); Value *UnreleasedLoad = - TLI->emitLoadLinked(Builder, PMV.AlignedAddr, MemOpOrder); + TLI->emitLoadLinked(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder); Value *UnreleasedLoadExtract = extractMaskedValue(Builder, UnreleasedLoad, PMV); Value *ShouldStore = Builder.CreateICmpEQ( @@ -1302,7 +1302,8 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { Builder.SetInsertPoint(ReleasedLoadBB); Value *SecondLoad; if (HasReleasedLoadBB) { - SecondLoad = TLI->emitLoadLinked(Builder, PMV.AlignedAddr, MemOpOrder); + SecondLoad = + TLI->emitLoadLinked(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder); Value *SecondLoadExtract = extractMaskedValue(Builder, SecondLoad, PMV); ShouldStore = Builder.CreateICmpEQ(SecondLoadExtract, CI->getCompareOperand(), "should_store"); diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 6d2f6a32553..dc4565924d3 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -17273,16 +17273,15 @@ AArch64TargetLowering::shouldExpandAtomicCmpXchgInIR( } Value *AArch64TargetLowering::emitLoadLinked(IRBuilderBase &Builder, - Value *Addr, + Type *ValueTy, Value *Addr, AtomicOrdering Ord) const { Module *M = Builder.GetInsertBlock()->getParent()->getParent(); - Type *ValTy = cast(Addr->getType())->getElementType(); bool IsAcquire = isAcquireOrStronger(Ord); // Since i128 isn't legal and intrinsics don't get type-lowered, the ldrexd // intrinsic must return {i64, i64} and we have to recombine them into a // single i128 here. - if (ValTy->getPrimitiveSizeInBits() == 128) { + if (ValueTy->getPrimitiveSizeInBits() == 128) { Intrinsic::ID Int = IsAcquire ? Intrinsic::aarch64_ldaxp : Intrinsic::aarch64_ldxp; Function *Ldxr = Intrinsic::getDeclaration(M, Int); @@ -17292,10 +17291,10 @@ Value *AArch64TargetLowering::emitLoadLinked(IRBuilderBase &Builder, Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo"); Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi"); - Lo = Builder.CreateZExt(Lo, ValTy, "lo64"); - Hi = Builder.CreateZExt(Hi, ValTy, "hi64"); + Lo = Builder.CreateZExt(Lo, ValueTy, "lo64"); + Hi = Builder.CreateZExt(Hi, ValueTy, "hi64"); return Builder.CreateOr( - Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64"); + Lo, Builder.CreateShl(Hi, ConstantInt::get(ValueTy, 64)), "val64"); } Type *Tys[] = { Addr->getType() }; @@ -17303,13 +17302,11 @@ Value *AArch64TargetLowering::emitLoadLinked(IRBuilderBase &Builder, IsAcquire ? Intrinsic::aarch64_ldaxr : Intrinsic::aarch64_ldxr; Function *Ldxr = Intrinsic::getDeclaration(M, Int, Tys); - Type *EltTy = cast(Addr->getType())->getElementType(); - const DataLayout &DL = M->getDataLayout(); - IntegerType *IntEltTy = Builder.getIntNTy(DL.getTypeSizeInBits(EltTy)); + IntegerType *IntEltTy = Builder.getIntNTy(DL.getTypeSizeInBits(ValueTy)); Value *Trunc = Builder.CreateTrunc(Builder.CreateCall(Ldxr, Addr), IntEltTy); - return Builder.CreateBitCast(Trunc, EltTy); + return Builder.CreateBitCast(Trunc, ValueTy); } void AArch64TargetLowering::emitAtomicCmpXchgNoStoreLLBalance( diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h index a2e11afb337..3d74b04cd01 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.h +++ b/lib/Target/AArch64/AArch64ISelLowering.h @@ -646,7 +646,7 @@ public: return TargetLowering::shouldFormOverflowOp(Opcode, VT, true); } - Value *emitLoadLinked(IRBuilderBase &Builder, Value *Addr, + Value *emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr, AtomicOrdering Ord) const override; Value *emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr, AtomicOrdering Ord) const override; diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 653dbdf281e..7269ed91241 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -19895,16 +19895,16 @@ bool ARMTargetLowering::shouldExpandShift(SelectionDAG &DAG, SDNode *N) const { return !Subtarget->hasMinSize() || Subtarget->isTargetWindows(); } -Value *ARMTargetLowering::emitLoadLinked(IRBuilderBase &Builder, Value *Addr, +Value *ARMTargetLowering::emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, + Value *Addr, AtomicOrdering Ord) const { Module *M = Builder.GetInsertBlock()->getParent()->getParent(); - Type *ValTy = cast(Addr->getType())->getElementType(); bool IsAcquire = isAcquireOrStronger(Ord); // Since i64 isn't legal and intrinsics don't get type-lowered, the ldrexd // intrinsic must return {i32, i32} and we have to recombine them into a // single i64 here. - if (ValTy->getPrimitiveSizeInBits() == 64) { + if (ValueTy->getPrimitiveSizeInBits() == 64) { Intrinsic::ID Int = IsAcquire ? Intrinsic::arm_ldaexd : Intrinsic::arm_ldrexd; Function *Ldrex = Intrinsic::getDeclaration(M, Int); @@ -19916,19 +19916,17 @@ Value *ARMTargetLowering::emitLoadLinked(IRBuilderBase &Builder, Value *Addr, Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi"); if (!Subtarget->isLittle()) std::swap (Lo, Hi); - Lo = Builder.CreateZExt(Lo, ValTy, "lo64"); - Hi = Builder.CreateZExt(Hi, ValTy, "hi64"); + Lo = Builder.CreateZExt(Lo, ValueTy, "lo64"); + Hi = Builder.CreateZExt(Hi, ValueTy, "hi64"); return Builder.CreateOr( - Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 32)), "val64"); + Lo, Builder.CreateShl(Hi, ConstantInt::get(ValueTy, 32)), "val64"); } Type *Tys[] = { Addr->getType() }; Intrinsic::ID Int = IsAcquire ? Intrinsic::arm_ldaex : Intrinsic::arm_ldrex; Function *Ldrex = Intrinsic::getDeclaration(M, Int, Tys); - return Builder.CreateTruncOrBitCast( - Builder.CreateCall(Ldrex, Addr), - cast(Addr->getType())->getElementType()); + return Builder.CreateTruncOrBitCast(Builder.CreateCall(Ldrex, Addr), ValueTy); } void ARMTargetLowering::emitAtomicCmpXchgNoStoreLLBalance( diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index fb790598bd5..5a6dc047cf3 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -632,7 +632,7 @@ class VectorType; getExceptionSelectorRegister(const Constant *PersonalityFn) const override; Instruction *makeDMB(IRBuilderBase &Builder, ARM_MB::MemBOpt Domain) const; - Value *emitLoadLinked(IRBuilderBase &Builder, Value *Addr, + Value *emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr, AtomicOrdering Ord) const override; Value *emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr, AtomicOrdering Ord) const override; diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp index 280140f0092..06fee76c4ff 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -3555,25 +3555,24 @@ bool HexagonTargetLowering::shouldReduceLoadWidth(SDNode *Load, } Value *HexagonTargetLowering::emitLoadLinked(IRBuilderBase &Builder, - Value *Addr, + Type *ValueTy, Value *Addr, AtomicOrdering Ord) const { BasicBlock *BB = Builder.GetInsertBlock(); Module *M = BB->getParent()->getParent(); - auto PT = cast(Addr->getType()); - Type *Ty = PT->getElementType(); - unsigned SZ = Ty->getPrimitiveSizeInBits(); + unsigned SZ = ValueTy->getPrimitiveSizeInBits(); assert((SZ == 32 || SZ == 64) && "Only 32/64-bit atomic loads supported"); Intrinsic::ID IntID = (SZ == 32) ? Intrinsic::hexagon_L2_loadw_locked : Intrinsic::hexagon_L4_loadd_locked; Function *Fn = Intrinsic::getDeclaration(M, IntID); - PointerType *NewPtrTy - = Builder.getIntNTy(SZ)->getPointerTo(PT->getAddressSpace()); + auto PtrTy = cast(Addr->getType()); + PointerType *NewPtrTy = + Builder.getIntNTy(SZ)->getPointerTo(PtrTy->getAddressSpace()); Addr = Builder.CreateBitCast(Addr, NewPtrTy); Value *Call = Builder.CreateCall(Fn, Addr, "larx"); - return Builder.CreateBitCast(Call, Ty); + return Builder.CreateBitCast(Call, ValueTy); } /// Perform a store-conditional operation to Addr. Return the status of the diff --git a/lib/Target/Hexagon/HexagonISelLowering.h b/lib/Target/Hexagon/HexagonISelLowering.h index 2e8d78d9185..7d376f9f86a 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.h +++ b/lib/Target/Hexagon/HexagonISelLowering.h @@ -323,7 +323,7 @@ public: EVT NewVT) const override; // Handling of atomic RMW instructions. - Value *emitLoadLinked(IRBuilderBase &Builder, Value *Addr, + Value *emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr, AtomicOrdering Ord) const override; Value *emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr, AtomicOrdering Ord) const override; diff --git a/test/CodeGen/Hexagon/atomic-opaque-basic.ll b/test/CodeGen/Hexagon/atomic-opaque-basic.ll new file mode 100644 index 00000000000..f14ed465caa --- /dev/null +++ b/test/CodeGen/Hexagon/atomic-opaque-basic.ll @@ -0,0 +1,125 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=hexagon -force-opaque-pointers < %s | FileCheck %s + +%s.0 = type { i8 } +@g0 = internal global i8 0, align 1 + +define void @f0() #0 { +; CHECK-LABEL: f0: +; CHECK: .cfi_startproc +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: r29 = add(r29,#-8) +; CHECK-NEXT: r1 = #255 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r0 = add(r29,#7) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r2 = and(r0,#3) +; CHECK-NEXT: r0 = and(r0,#-4) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r2 = asl(r2,#3) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r1 = asl(r1,r2) +; CHECK-NEXT: r2 = lsl(#2,r2) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r3 = sub(#-1,r1) +; CHECK-NEXT: } +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: .LBB0_1: // %atomicrmw.start +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: { +; CHECK-NEXT: r4 = memw_locked(r0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r5 = and(r4,r3) +; CHECK-NEXT: r4 = add(r4,r2) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r5 |= and(r4,r1) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: memw_locked(r0,p0) = r5 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: if (!p0) jump:nt .LBB0_1 +; CHECK-NEXT: } +; CHECK-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-NEXT: { +; CHECK-NEXT: r29 = add(r29,#8) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %v0 = alloca %s.0 + %v1 = getelementptr %s.0, %s.0* %v0, i32 0, i32 0 + atomicrmw add i8* %v1, i8 2 monotonic + ret void +} + +define void @f1() #0 { +; CHECK-LABEL: f1: +; CHECK: .cfi_startproc +; CHECK-NEXT: // %bb.0: // %entry +; CHECK-NEXT: { +; CHECK-NEXT: r2 = ##g0 +; CHECK-NEXT: r0 = #255 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r1 = and(r2,#3) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r1 = asl(r1,#3) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r4 = r1 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r4 = insert(r2,#2,#3) +; CHECK-NEXT: r2 = and(r2,#-4) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r3 = lsl(#1,r4) +; CHECK-NEXT: r4 = asl(r0,r4) +; CHECK-NEXT: } +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: .LBB1_1: // %cmpxchg.start +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: { +; CHECK-NEXT: r5 = memw_locked(r2) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r6 = lsr(r5,r1) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: p0 = !bitsclr(r6,r0) +; CHECK-NEXT: if (p0.new) jumpr:nt r31 +; CHECK-NEXT: } +; CHECK-NEXT: .LBB1_2: // %cmpxchg.trystore +; CHECK-NEXT: // in Loop: Header=BB1_1 Depth=1 +; CHECK-NEXT: { +; CHECK-NEXT: r6 = r3 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r6 |= and(r5,~r4) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: memw_locked(r2,p0) = r6 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: if (!p0) jump:nt .LBB1_1 +; CHECK-NEXT: } +; CHECK-NEXT: // %bb.3: // %cmpxchg.end +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +entry: + %v0 = cmpxchg volatile i8* @g0, i8 0, i8 1 seq_cst seq_cst + ret void +} + + +attributes #0 = { "target-cpu"="hexagonv66" } +