diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index 011056c21b1..4de1f2aba41 100644 --- a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -335,6 +335,29 @@ PPCTTIImpl::getUserCost(const User *U, ArrayRef Operands, return BaseT::getUserCost(U, Operands, CostKind); } +// Determining the address of a TLS variable results in a function call in +// certain TLS models. +static bool memAddrUsesCTR(const Value *MemAddr, const PPCTargetMachine &TM, + SmallPtrSetImpl &Visited) { + // No need to traverse again if we already checked this operand. + if (!Visited.insert(MemAddr).second) + return false; + const auto *GV = dyn_cast(MemAddr); + if (!GV) { + // Recurse to check for constants that refer to TLS global variables. + if (const auto *CV = dyn_cast(MemAddr)) + for (const auto &CO : CV->operands()) + if (memAddrUsesCTR(CO, TM, Visited)) + return true; + return false; + } + + if (!GV->isThreadLocal()) + return false; + TLSModel::Model Model = TM.getTLSModel(GV); + return Model == TLSModel::GeneralDynamic || Model == TLSModel::LocalDynamic; +} + bool PPCTTIImpl::mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo, SmallPtrSetImpl &Visited) { const PPCTargetMachine &TM = ST->getTargetMachine(); @@ -353,31 +376,6 @@ bool PPCTTIImpl::mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo, return false; }; - // Determining the address of a TLS variable results in a function call in - // certain TLS models. - std::function memAddrUsesCTR = - [&memAddrUsesCTR, &TM, &Visited](const Value *MemAddr) -> bool { - // No need to traverse again if we already checked this operand. - if (!Visited.insert(MemAddr).second) - return false; - const auto *GV = dyn_cast(MemAddr); - if (!GV) { - // Recurse to check for constants that refer to TLS global variables. - if (const auto *CV = dyn_cast(MemAddr)) - for (const auto &CO : CV->operands()) - if (memAddrUsesCTR(CO)) - return true; - - return false; - } - - if (!GV->isThreadLocal()) - return false; - TLSModel::Model Model = TM.getTLSModel(GV); - return Model == TLSModel::GeneralDynamic || - Model == TLSModel::LocalDynamic; - }; - auto isLargeIntegerTy = [](bool Is32Bit, Type *Ty) { if (IntegerType *ITy = dyn_cast(Ty)) return ITy->getBitWidth() > (Is32Bit ? 32U : 64U); @@ -676,7 +674,7 @@ bool PPCTTIImpl::mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo, } for (Value *Operand : J->operands()) - if (memAddrUsesCTR(Operand)) + if (memAddrUsesCTR(Operand, TM, Visited)) return true; } @@ -736,6 +734,24 @@ bool PPCTTIImpl::isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, } } + // If an exit block has a PHI that accesses a TLS variable as one of the + // incoming values from the loop, we cannot produce a CTR loop because the + // address for that value will be computed in the loop. + SmallVector ExitBlocks; + L->getExitBlocks(ExitBlocks); + for (auto &BB : ExitBlocks) { + for (auto &PHI : BB->phis()) { + for (int Idx = 0, EndIdx = PHI.getNumIncomingValues(); Idx < EndIdx; + Idx++) { + const BasicBlock *IncomingBB = PHI.getIncomingBlock(Idx); + const Value *IncomingValue = PHI.getIncomingValue(Idx); + if (L->contains(IncomingBB) && + memAddrUsesCTR(IncomingValue, TM, Visited)) + return false; + } + } + } + LLVMContext &C = L->getHeader()->getContext(); HWLoopInfo.CountType = TM.isPPC64() ? Type::getInt64Ty(C) : Type::getInt32Ty(C); diff --git a/test/CodeGen/PowerPC/pr48527.ll b/test/CodeGen/PowerPC/pr48527.ll new file mode 100644 index 00000000000..eaff15ce071 --- /dev/null +++ b/test/CodeGen/PowerPC/pr48527.ll @@ -0,0 +1,70 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -relocation-model=pic -verify-machineinstrs < %s \ +; RUN: -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 | FileCheck %s +%struct.e.0.12.28.44.104.108.112.188 = type { i32 } +%struct.t.1.13.29.45.105.109.113.189 = type { i64, i64 } + +@g = external local_unnamed_addr global %struct.e.0.12.28.44.104.108.112.188, align 4 +@aj = external thread_local local_unnamed_addr global %struct.t.1.13.29.45.105.109.113.189, align 8 + +define void @_ZNK1q1rEv() local_unnamed_addr #0 align 2 { +; CHECK-LABEL: _ZNK1q1rEv: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mflr 0 +; CHECK-NEXT: std 29, -24(1) # 8-byte Folded Spill +; CHECK-NEXT: std 30, -16(1) # 8-byte Folded Spill +; CHECK-NEXT: std 0, 16(1) +; CHECK-NEXT: stdu 1, -64(1) +; CHECK-NEXT: lwz 30, 0(3) +; CHECK-NEXT: addis 3, 2, .LC0@toc@ha +; CHECK-NEXT: ld 29, .LC0@toc@l(3) +; CHECK-NEXT: addis 3, 2, aj@got@tlsgd@ha +; CHECK-NEXT: addi 3, 3, aj@got@tlsgd@l +; CHECK-NEXT: bl __tls_get_addr(aj@tlsgd) +; CHECK-NEXT: nop +; CHECK-NEXT: addi 4, 3, 8 +; CHECK-NEXT: .p2align 5 +; CHECK-NEXT: .LBB0_1: # %monotonic.i +; CHECK-NEXT: # +; CHECK-NEXT: lwz 5, 0(29) +; CHECK-NEXT: andi. 5, 5, 255 +; CHECK-NEXT: bne 0, .LBB0_4 +; CHECK-NEXT: # %bb.2: # %for.cond.i +; CHECK-NEXT: # +; CHECK-NEXT: addi 30, 30, -1 +; CHECK-NEXT: cmplwi 30, 0 +; CHECK-NEXT: bne 0, .LBB0_1 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: mr 4, 3 +; CHECK-NEXT: .LBB0_4: # %if.end +; CHECK-NEXT: li 3, 1 +; CHECK-NEXT: std 3, 0(4) +; CHECK-NEXT: addi 1, 1, 64 +; CHECK-NEXT: ld 0, 16(1) +; CHECK-NEXT: ld 30, -16(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 29, -24(1) # 8-byte Folded Reload +; CHECK-NEXT: mtlr 0 +; CHECK-NEXT: blr +entry: + %0 = load i32, i32* undef, align 4 + br label %monotonic.i + +for.cond.i: ; preds = %monotonic.i + %exitcond.not = icmp eq i32 %inc.i, %0 + br i1 %exitcond.not, label %if.end, label %monotonic.i + +monotonic.i: ; preds = %for.cond.i, %entry + %i.018.i = phi i32 [ %inc.i, %for.cond.i ], [ 0, %entry ] + %1 = load atomic i32, i32* getelementptr inbounds (%struct.e.0.12.28.44.104.108.112.188, %struct.e.0.12.28.44.104.108.112.188* @g, i64 0, i32 0) monotonic, align 4 + %conv.i = trunc i32 %1 to i8 + %tobool.not.i = icmp eq i8 %conv.i, 0 + %inc.i = add nuw nsw i32 %i.018.i, 1 + br i1 %tobool.not.i, label %for.cond.i, label %if.end + +if.end: ; preds = %monotonic.i, %for.cond.i + %.sink = phi i64* [ getelementptr inbounds (%struct.t.1.13.29.45.105.109.113.189, %struct.t.1.13.29.45.105.109.113.189* @aj, i64 0, i32 1), %monotonic.i ], [ getelementptr inbounds (%struct.t.1.13.29.45.105.109.113.189, %struct.t.1.13.29.45.105.109.113.189* @aj, i64 0, i32 0), %for.cond.i ] + store i64 1, i64* %.sink, align 8 + ret void +} + +attributes #0 = { nounwind }