mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-02-01 05:01:59 +01:00
LSR: Check more intrinsic pointer operands
llvm-svn: 320424
This commit is contained in:
parent
6c52f16a00
commit
b1cb514280
@ -288,6 +288,32 @@ unsigned AMDGPUTTIImpl::getMaxInterleaveFactor(unsigned VF) {
|
||||
return 8;
|
||||
}
|
||||
|
||||
bool AMDGPUTTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
|
||||
MemIntrinsicInfo &Info) const {
|
||||
switch (Inst->getIntrinsicID()) {
|
||||
case Intrinsic::amdgcn_atomic_inc:
|
||||
case Intrinsic::amdgcn_atomic_dec: {
|
||||
auto *Ordering = dyn_cast<ConstantInt>(Inst->getArgOperand(2));
|
||||
auto *Volatile = dyn_cast<ConstantInt>(Inst->getArgOperand(4));
|
||||
if (!Ordering || !Volatile)
|
||||
return false; // Invalid.
|
||||
|
||||
unsigned OrderingVal = Ordering->getZExtValue();
|
||||
if (OrderingVal > static_cast<unsigned>(AtomicOrdering::SequentiallyConsistent))
|
||||
return false;
|
||||
|
||||
Info.PtrVal = Inst->getArgOperand(0);
|
||||
Info.Ordering = static_cast<AtomicOrdering>(OrderingVal);
|
||||
Info.ReadMem = true;
|
||||
Info.WriteMem = true;
|
||||
Info.IsVolatile = !Volatile->isNullValue();
|
||||
return true;
|
||||
}
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
int AMDGPUTTIImpl::getArithmeticInstrCost(
|
||||
unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info,
|
||||
TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo,
|
||||
|
@ -132,6 +132,8 @@ public:
|
||||
|
||||
unsigned getMaxInterleaveFactor(unsigned VF);
|
||||
|
||||
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const;
|
||||
|
||||
int getArithmeticInstrCost(
|
||||
unsigned Opcode, Type *Ty,
|
||||
TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
|
||||
|
@ -777,7 +777,8 @@ static GlobalValue *ExtractSymbol(const SCEV *&S, ScalarEvolution &SE) {
|
||||
|
||||
/// Returns true if the specified instruction is using the specified value as an
|
||||
/// address.
|
||||
static bool isAddressUse(Instruction *Inst, Value *OperandVal) {
|
||||
static bool isAddressUse(const TargetTransformInfo &TTI,
|
||||
Instruction *Inst, Value *OperandVal) {
|
||||
bool isAddress = isa<LoadInst>(Inst);
|
||||
if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
|
||||
if (SI->getPointerOperand() == OperandVal)
|
||||
@ -786,18 +787,24 @@ static bool isAddressUse(Instruction *Inst, Value *OperandVal) {
|
||||
// Addressing modes can also be folded into prefetches and a variety
|
||||
// of intrinsics.
|
||||
switch (II->getIntrinsicID()) {
|
||||
default: break;
|
||||
case Intrinsic::memset:
|
||||
case Intrinsic::prefetch:
|
||||
if (II->getArgOperand(0) == OperandVal)
|
||||
case Intrinsic::memset:
|
||||
case Intrinsic::prefetch:
|
||||
if (II->getArgOperand(0) == OperandVal)
|
||||
isAddress = true;
|
||||
break;
|
||||
case Intrinsic::memmove:
|
||||
case Intrinsic::memcpy:
|
||||
if (II->getArgOperand(0) == OperandVal ||
|
||||
II->getArgOperand(1) == OperandVal)
|
||||
isAddress = true;
|
||||
break;
|
||||
default: {
|
||||
MemIntrinsicInfo IntrInfo;
|
||||
if (TTI.getTgtMemIntrinsic(II, IntrInfo)) {
|
||||
if (IntrInfo.PtrVal == OperandVal)
|
||||
isAddress = true;
|
||||
break;
|
||||
case Intrinsic::memmove:
|
||||
case Intrinsic::memcpy:
|
||||
if (II->getArgOperand(0) == OperandVal ||
|
||||
II->getArgOperand(1) == OperandVal)
|
||||
isAddress = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(Inst)) {
|
||||
if (RMW->getPointerOperand() == OperandVal)
|
||||
@ -810,7 +817,8 @@ static bool isAddressUse(Instruction *Inst, Value *OperandVal) {
|
||||
}
|
||||
|
||||
/// Return the type of the memory being accessed.
|
||||
static MemAccessTy getAccessType(const Instruction *Inst) {
|
||||
static MemAccessTy getAccessType(const TargetTransformInfo &TTI,
|
||||
Instruction *Inst) {
|
||||
MemAccessTy AccessTy(Inst->getType(), MemAccessTy::UnknownAddressSpace);
|
||||
if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
|
||||
AccessTy.MemTy = SI->getOperand(0)->getType();
|
||||
@ -821,6 +829,21 @@ static MemAccessTy getAccessType(const Instruction *Inst) {
|
||||
AccessTy.AddrSpace = RMW->getPointerAddressSpace();
|
||||
} else if (const AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(Inst)) {
|
||||
AccessTy.AddrSpace = CmpX->getPointerAddressSpace();
|
||||
} else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
|
||||
switch (II->getIntrinsicID()) {
|
||||
case Intrinsic::prefetch:
|
||||
AccessTy.AddrSpace = II->getArgOperand(0)->getType()->getPointerAddressSpace();
|
||||
break;
|
||||
default: {
|
||||
MemIntrinsicInfo IntrInfo;
|
||||
if (TTI.getTgtMemIntrinsic(II, IntrInfo) && IntrInfo.PtrVal) {
|
||||
AccessTy.AddrSpace
|
||||
= IntrInfo.PtrVal->getType()->getPointerAddressSpace();
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// All pointers have the same requirements, so canonicalize them to an
|
||||
@ -1025,7 +1048,7 @@ private:
|
||||
ScalarEvolution &SE, DominatorTree &DT,
|
||||
SmallPtrSetImpl<const SCEV *> *LoserRegs);
|
||||
};
|
||||
|
||||
|
||||
/// An operand value in an instruction which is to be replaced with some
|
||||
/// equivalent, possibly strength-reduced, replacement.
|
||||
struct LSRFixup {
|
||||
@ -1149,7 +1172,7 @@ public:
|
||||
if (f.Offset < MinOffset)
|
||||
MinOffset = f.Offset;
|
||||
}
|
||||
|
||||
|
||||
bool HasFormulaWithSameRegs(const Formula &F) const;
|
||||
float getNotSelectedProbability(const SCEV *Reg) const;
|
||||
bool InsertFormula(const Formula &F, const Loop &L);
|
||||
@ -2362,7 +2385,7 @@ LSRInstance::OptimizeLoopTermCond() {
|
||||
C->getValue().isMinSignedValue())
|
||||
goto decline_post_inc;
|
||||
// Check for possible scaled-address reuse.
|
||||
MemAccessTy AccessTy = getAccessType(UI->getUser());
|
||||
MemAccessTy AccessTy = getAccessType(TTI, UI->getUser());
|
||||
int64_t Scale = C->getSExtValue();
|
||||
if (TTI.isLegalAddressingMode(AccessTy.MemTy, /*BaseGV=*/nullptr,
|
||||
/*BaseOffset=*/0,
|
||||
@ -3032,13 +3055,13 @@ void LSRInstance::FinalizeChain(IVChain &Chain) {
|
||||
static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst,
|
||||
Value *Operand, const TargetTransformInfo &TTI) {
|
||||
const SCEVConstant *IncConst = dyn_cast<SCEVConstant>(IncExpr);
|
||||
if (!IncConst || !isAddressUse(UserInst, Operand))
|
||||
if (!IncConst || !isAddressUse(TTI, UserInst, Operand))
|
||||
return false;
|
||||
|
||||
if (IncConst->getAPInt().getMinSignedBits() > 64)
|
||||
return false;
|
||||
|
||||
MemAccessTy AccessTy = getAccessType(UserInst);
|
||||
MemAccessTy AccessTy = getAccessType(TTI, UserInst);
|
||||
int64_t IncOffset = IncConst->getValue()->getSExtValue();
|
||||
if (!isAlwaysFoldable(TTI, LSRUse::Address, AccessTy, /*BaseGV=*/nullptr,
|
||||
IncOffset, /*HaseBaseReg=*/false))
|
||||
@ -3165,14 +3188,14 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
|
||||
|
||||
LSRUse::KindType Kind = LSRUse::Basic;
|
||||
MemAccessTy AccessTy;
|
||||
if (isAddressUse(UserInst, U.getOperandValToReplace())) {
|
||||
if (isAddressUse(TTI, UserInst, U.getOperandValToReplace())) {
|
||||
Kind = LSRUse::Address;
|
||||
AccessTy = getAccessType(UserInst);
|
||||
AccessTy = getAccessType(TTI, UserInst);
|
||||
}
|
||||
|
||||
const SCEV *S = IU.getExpr(U);
|
||||
PostIncLoopSet TmpPostIncLoops = U.getPostIncLoops();
|
||||
|
||||
|
||||
// Equality (== and !=) ICmps are special. We can rewrite (i == N) as
|
||||
// (N - i == 0), and this allows (N - i) to be the expression that we work
|
||||
// with rather than just N or i, so we can consider the register
|
||||
@ -4304,7 +4327,7 @@ void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
|
||||
LUThatHas->pushFixup(Fixup);
|
||||
DEBUG(dbgs() << "New fixup has offset " << Fixup.Offset << '\n');
|
||||
}
|
||||
|
||||
|
||||
// Delete formulae from the new use which are no longer legal.
|
||||
bool Any = false;
|
||||
for (size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) {
|
||||
|
@ -407,6 +407,19 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset_addr64(i64 addrspace
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}nocse_lds_atomic_inc_ret_i32:
|
||||
; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
|
||||
; GCN: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[K]]
|
||||
; GCN: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[K]]
|
||||
define amdgpu_kernel void @nocse_lds_atomic_inc_ret_i32(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 addrspace(3)* %ptr) #0 {
|
||||
%result0 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 false)
|
||||
%result1 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 false)
|
||||
|
||||
store i32 %result0, i32 addrspace(1)* %out0
|
||||
store i32 %result1, i32 addrspace(1)* %out1
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind readnone }
|
||||
attributes #2 = { nounwind argmemonly }
|
||||
|
@ -84,4 +84,84 @@ bb:
|
||||
br i1 %exitcond, label %._crit_edge.loopexit, label %.lr.ph
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
; OPT-LABEL: @test_local_atomicinc_addressing_loop_uniform_index_max_offset_i32(
|
||||
; OPT-NOT: getelementptr
|
||||
|
||||
; OPT: .lr.ph:
|
||||
; OPT: %lsr.iv2 = phi i32 addrspace(3)* [ %scevgep3, %.lr.ph ], [ %arg1, %.lr.ph.preheader ]
|
||||
; OPT: %lsr.iv1 = phi i32 addrspace(3)* [ %scevgep, %.lr.ph ], [ %arg0, %.lr.ph.preheader ]
|
||||
; OPT: %lsr.iv = phi i32 [ %lsr.iv.next, %.lr.ph ], [ %n, %.lr.ph.preheader ]
|
||||
; OPT: %scevgep4 = getelementptr i32, i32 addrspace(3)* %lsr.iv2, i32 16383
|
||||
; OPT: %tmp4 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %scevgep4, i32 undef, i32 0, i32 0, i1 false)
|
||||
; OPT: %tmp7 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %lsr.iv1, i32 undef, i32 0, i32 0, i1 false)
|
||||
define amdgpu_kernel void @test_local_atomicinc_addressing_loop_uniform_index_max_offset_i32(i32 addrspace(3)* noalias nocapture %arg0, i32 addrspace(3)* noalias nocapture readonly %arg1, i32 %n) #0 {
|
||||
bb:
|
||||
%tmp = icmp sgt i32 %n, 0
|
||||
br i1 %tmp, label %.lr.ph.preheader, label %._crit_edge
|
||||
|
||||
.lr.ph.preheader: ; preds = %bb
|
||||
br label %.lr.ph
|
||||
|
||||
._crit_edge.loopexit: ; preds = %.lr.ph
|
||||
br label %._crit_edge
|
||||
|
||||
._crit_edge: ; preds = %._crit_edge.loopexit, %bb
|
||||
ret void
|
||||
|
||||
.lr.ph: ; preds = %.lr.ph, %.lr.ph.preheader
|
||||
%indvars.iv = phi i32 [ %indvars.iv.next, %.lr.ph ], [ 0, %.lr.ph.preheader ]
|
||||
%tmp1 = add nuw nsw i32 %indvars.iv, 16383
|
||||
%tmp3 = getelementptr inbounds i32, i32 addrspace(3)* %arg1, i32 %tmp1
|
||||
%tmp4 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %tmp3, i32 undef, i32 0, i32 0, i1 false)
|
||||
%tmp6 = getelementptr inbounds i32, i32 addrspace(3)* %arg0, i32 %indvars.iv
|
||||
%tmp7 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %tmp6, i32 undef, i32 0, i32 0, i1 false)
|
||||
%tmp8 = add nsw i32 %tmp7, %tmp4
|
||||
atomicrmw add i32 addrspace(3)* %tmp6, i32 %tmp8 seq_cst
|
||||
%indvars.iv.next = add nuw nsw i32 %indvars.iv, 1
|
||||
%exitcond = icmp eq i32 %indvars.iv.next, %n
|
||||
br i1 %exitcond, label %._crit_edge.loopexit, label %.lr.ph
|
||||
}
|
||||
|
||||
; OPT-LABEL: @test_local_atomicdec_addressing_loop_uniform_index_max_offset_i32(
|
||||
; OPT-NOT: getelementptr
|
||||
|
||||
; OPT: .lr.ph:
|
||||
; OPT: %lsr.iv2 = phi i32 addrspace(3)* [ %scevgep3, %.lr.ph ], [ %arg1, %.lr.ph.preheader ]
|
||||
; OPT: %lsr.iv1 = phi i32 addrspace(3)* [ %scevgep, %.lr.ph ], [ %arg0, %.lr.ph.preheader ]
|
||||
; OPT: %lsr.iv = phi i32 [ %lsr.iv.next, %.lr.ph ], [ %n, %.lr.ph.preheader ]
|
||||
; OPT: %scevgep4 = getelementptr i32, i32 addrspace(3)* %lsr.iv2, i32 16383
|
||||
; OPT: %tmp4 = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %scevgep4, i32 undef, i32 0, i32 0, i1 false)
|
||||
; OPT: %tmp7 = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %lsr.iv1, i32 undef, i32 0, i32 0, i1 false)
|
||||
define amdgpu_kernel void @test_local_atomicdec_addressing_loop_uniform_index_max_offset_i32(i32 addrspace(3)* noalias nocapture %arg0, i32 addrspace(3)* noalias nocapture readonly %arg1, i32 %n) #0 {
|
||||
bb:
|
||||
%tmp = icmp sgt i32 %n, 0
|
||||
br i1 %tmp, label %.lr.ph.preheader, label %._crit_edge
|
||||
|
||||
.lr.ph.preheader: ; preds = %bb
|
||||
br label %.lr.ph
|
||||
|
||||
._crit_edge.loopexit: ; preds = %.lr.ph
|
||||
br label %._crit_edge
|
||||
|
||||
._crit_edge: ; preds = %._crit_edge.loopexit, %bb
|
||||
ret void
|
||||
|
||||
.lr.ph: ; preds = %.lr.ph, %.lr.ph.preheader
|
||||
%indvars.iv = phi i32 [ %indvars.iv.next, %.lr.ph ], [ 0, %.lr.ph.preheader ]
|
||||
%tmp1 = add nuw nsw i32 %indvars.iv, 16383
|
||||
%tmp3 = getelementptr inbounds i32, i32 addrspace(3)* %arg1, i32 %tmp1
|
||||
%tmp4 = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %tmp3, i32 undef, i32 0, i32 0, i1 false)
|
||||
%tmp6 = getelementptr inbounds i32, i32 addrspace(3)* %arg0, i32 %indvars.iv
|
||||
%tmp7 = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %tmp6, i32 undef, i32 0, i32 0, i1 false)
|
||||
%tmp8 = add nsw i32 %tmp7, %tmp4
|
||||
atomicrmw add i32 addrspace(3)* %tmp6, i32 %tmp8 seq_cst
|
||||
%indvars.iv.next = add nuw nsw i32 %indvars.iv, 1
|
||||
%exitcond = icmp eq i32 %indvars.iv.next, %n
|
||||
br i1 %exitcond, label %._crit_edge.loopexit, label %.lr.ph
|
||||
}
|
||||
|
||||
declare i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* nocapture, i32, i32, i32, i1) #1
|
||||
declare i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* nocapture, i32, i32, i32, i1) #1
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind argmemonly }
|
||||
|
Loading…
x
Reference in New Issue
Block a user