1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-22 12:33:33 +02:00

[ARM] Allow signed icmps in ARMCodeGenPrepare

Originally committed in r339755 which was reverted in r339806 due to
an asan issue. The issue was caused by my assumption that operands to
a CallInst mapped to the FunctionType Params. CallInsts are now
handled by iterating over their ArgOperands instead of Operands.
    
Original Message:
  Treat signed icmps as 'sinks', allowing them to be in the use-def
  tree, enabling more promotions to be performed. As a sink, any
  promoted incoming values need to be truncated before being used by
  the signed icmp.
    
  Differential Revision: https://reviews.llvm.org/D50067

llvm-svn: 339858
This commit is contained in:
Sam Parker 2018-08-16 10:05:39 +00:00
parent e1b073ff1a
commit ff9d75eee7
3 changed files with 145 additions and 31 deletions

View File

@ -181,6 +181,8 @@ static bool isSink(Value *V) {
return UsesNarrowValue(Return->getReturnValue());
if (auto *Trunc = dyn_cast<TruncInst>(V))
return UsesNarrowValue(Trunc->getOperand(0));
if (auto *ICmp = dyn_cast<ICmpInst>(V))
return ICmp->isSigned();
return isa<CallInst>(V);
}
@ -294,6 +296,11 @@ void IRPromoter::Mutate(Type *OrigTy,
LLVM_DEBUG(dbgs() << "ARM CGP: Promoting use-def chains to from "
<< ARMCodeGenPrepare::TypeSize << " to 32-bits\n");
// Cache original types.
DenseMap<Value*, Type*> TruncTysMap;
for (auto *V : Visited)
TruncTysMap[V] = V->getType();
auto ReplaceAllUsersOfWith = [&](Value *From, Value *To) {
SmallVector<Instruction*, 4> Users;
Instruction *InstTo = dyn_cast<Instruction>(To);
@ -337,6 +344,7 @@ void IRPromoter::Mutate(Type *OrigTy,
ReplaceAllUsersOfWith(I, Call);
InstsToRemove.push_back(I);
NewInsts.insert(Call);
TruncTysMap[Call] = OrigTy;
};
auto InsertZExt = [&](Value *V, Instruction *InsertPt) {
@ -351,6 +359,7 @@ void IRPromoter::Mutate(Type *OrigTy,
ZExt->moveAfter(InsertPt);
ReplaceAllUsersOfWith(V, ZExt);
NewInsts.insert(ZExt);
TruncTysMap[ZExt] = TruncTysMap[V];
};
// First, insert extending instructions between the leaves and their users.
@ -409,42 +418,48 @@ void IRPromoter::Mutate(Type *OrigTy,
InsertDSPIntrinsic(cast<Instruction>(V));
}
auto InsertTrunc = [&](Value *V) -> Instruction* {
if (!isa<Instruction>(V) || !isa<IntegerType>(V->getType()))
return nullptr;
if ((!Promoted.count(V) && !NewInsts.count(V)) || !TruncTysMap.count(V))
return nullptr;
Type *TruncTy = TruncTysMap[V];
if (TruncTy == ExtTy)
return nullptr;
LLVM_DEBUG(dbgs() << "ARM CGP: Creating " << *TruncTy << " Trunc for "
<< *V << "\n");
Builder.SetInsertPoint(cast<Instruction>(V));
auto *Trunc = cast<Instruction>(Builder.CreateTrunc(V, TruncTy));
NewInsts.insert(Trunc);
return Trunc;
};
LLVM_DEBUG(dbgs() << "ARM CGP: Fixing up the roots:\n");
// Fix up any stores or returns that use the results of the promoted
// chain.
for (auto I : Roots) {
LLVM_DEBUG(dbgs() << " - " << *I << "\n");
Type *TruncTy = OrigTy;
if (auto *Store = dyn_cast<StoreInst>(I)) {
auto *PtrTy = cast<PointerType>(Store->getPointerOperandType());
TruncTy = PtrTy->getElementType();
} else if (isa<ReturnInst>(I)) {
Function *F = I->getParent()->getParent();
TruncTy = F->getFunctionType()->getReturnType();
// Handle calls separately as we need to iterate over arg operands.
if (auto *Call = dyn_cast<CallInst>(I)) {
for (unsigned i = 0; i < Call->getNumArgOperands(); ++i) {
Value *Arg = Call->getArgOperand(i);
if (Instruction *Trunc = InsertTrunc(Arg)) {
Trunc->moveBefore(Call);
Call->setArgOperand(i, Trunc);
}
}
continue;
}
// Now handle the others.
for (unsigned i = 0; i < I->getNumOperands(); ++i) {
Value *V = I->getOperand(i);
if (!isa<IntegerType>(V->getType()))
continue;
if (Promoted.count(V) || NewInsts.count(V)) {
if (auto *Op = dyn_cast<Instruction>(V)) {
if (auto *Call = dyn_cast<CallInst>(I))
TruncTy = Call->getFunctionType()->getParamType(i);
if (TruncTy == ExtTy)
continue;
LLVM_DEBUG(dbgs() << "ARM CGP: Creating " << *TruncTy
<< " Trunc for " << *Op << "\n");
Builder.SetInsertPoint(Op);
auto *Trunc = cast<Instruction>(Builder.CreateTrunc(Op, TruncTy));
Trunc->moveBefore(I);
I->setOperand(i, Trunc);
NewInsts.insert(Trunc);
}
if (Instruction *Trunc = InsertTrunc(I->getOperand(i))) {
Trunc->moveBefore(I);
I->setOperand(i, Trunc);
}
}
}
@ -458,8 +473,8 @@ void IRPromoter::Mutate(Type *OrigTy,
bool ARMCodeGenPrepare::isSupportedValue(Value *V) {
LLVM_DEBUG(dbgs() << "ARM CGP: Is " << *V << " supported?\n");
if (auto *ICmp = dyn_cast<ICmpInst>(V))
return ICmp->isEquality() || !ICmp->isSigned();
if (isa<ICmpInst>(V))
return true;
// Memory instructions
if (isa<StoreInst>(V) || isa<GetElementPtrInst>(V))

View File

@ -269,7 +269,6 @@ entry:
; CHECK-COMMON-LABEL: icmp_i7
; CHECK-COMMON: ldrb
; CHECK-COMMON: and
; CHECK-COMMON: cmp
define i32 @icmp_i7(i7* %arg0, i7 zeroext %arg1, i32 %a, i32 %b) {
entry:

View File

@ -0,0 +1,100 @@
; RUN: llc -mtriple=thumbv8.main -mcpu=cortex-m33 -arm-disable-cgp=false -mattr=-use-misched %s -o - | FileCheck %s --check-prefix=CHECK-COMMON --check-prefix=CHECK-NODSP
; RUN: llc -mtriple=thumbv7em %s -arm-disable-cgp=false -arm-enable-scalar-dsp=true -o - | FileCheck %s --check-prefix=CHECK-COMMON --check-prefix=CHECK-DSP
; RUN: llc -mtriple=thumbv8 %s -arm-disable-cgp=false -arm-enable-scalar-dsp=true -arm-enable-scalar-dsp-imms=true -o - | FileCheck %s --check-prefix=CHECK-COMMON --check-prefix=CHECK-DSP-IMM
; CHECK-COMMON-LABEL: eq_sgt
; CHECK-NODSP: add
; CHECK-NODSP: uxtb
; CHECK-NODSP: sxtb
; CHECK-NODSP: cmp
; CHECK-NODSP: sub
; CHECK-NODSP: sxtb
; CHECK-NODSP: cmp
; CHECK-DSP: add
; CHECK-DSP: uxtb
; CHECK-DSP: cmp
; CHECK-DSP: sxtb
; CHECK-DSP: sub
; CHECK-DSP: sxtb
; CHECK-DSP: cmp
; CHECK-DSP-IMM: uadd8 [[ADD:r[0-9]+]],
; CHECK-DSP-IMM: cmp [[ADD]],
; CHECK-DSP-IMM: sxtb [[SEXT0:r[0-9]+]], [[ADD]]
; CHECK-DSP-IMM: usub8 [[SUB:r[0-9]+]],
; CHECK-DSP-IMM: sxtb [[SEXT1:r[0-9]+]], [[SUB]]
; CHECK-DSP-IMM: cmp [[SEXT1]], [[SEXT0]]
define i8 @eq_sgt(i8* %x, i8 *%y, i8 zeroext %z) {
entry:
%load0 = load i8, i8* %x, align 1
%load1 = load i8, i8* %y, align 1
%add = add i8 %load0, %z
%sub = sub i8 %load1, 1
%cmp = icmp eq i8 %add, 200
%cmp1 = icmp sgt i8 %sub, %add
%res0 = select i1 %cmp, i8 35, i8 47
%res1 = select i1 %cmp1, i8 %res0, i8 %sub
ret i8 %res1
}
; CHECK-COMMON-LABEL: ugt_slt
; CHECK-NODSP: sub
; CHECK-NODSP: sxth
; CHECK-NODSP: uxth
; CHECK-NODSP: add
; CHECK-NODSP: sxth
; CHECK-NODSP: cmp
; CHECK-NODSP: cmp
; CHECK-DSP: sxth [[ARG:r[0-9]+]], r2
; CHECK-DSP: subs [[SUB:r[0-9]+]],
; CHECK-DSP: uadd16 [[ADD:r[0-9]+]],
; CHECK-DSP: sxth.w [[SEXT:r[0-9]+]], [[ADD]]
; CHECK-DSP: cmp [[SEXT]], [[ARG]]
; CHECK-DSP-NOT: uxt
; CHECK-DSP: cmp [[SUB]], r2
define i16 @ugt_slt(i16 *%x, i16 zeroext %y, i16 zeroext %z) {
entry:
%load0 = load i16, i16* %x, align 1
%add = add i16 %load0, %z
%sub = sub i16 %y, 1
%cmp = icmp slt i16 %add, %z
%cmp1 = icmp ugt i16 %sub, %z
%res0 = select i1 %cmp, i16 35, i16 -1
%res1 = select i1 %cmp1, i16 %res0, i16 0
ret i16 %res1
}
; CHECK-COMMON-LABEL: urem_trunc_icmps
; CHECK-COMMON-NOT: uxt
; CHECK-COMMON: sxtb [[SEXT:r[0-9]+]],
; CHECK-COMMON: cmp [[SEXT]], #7
define void @urem_trunc_icmps(i16** %in, i32* %g, i32* %k) {
entry:
%ptr = load i16*, i16** %in, align 4
%ld = load i16, i16* %ptr, align 2
%cmp.i = icmp eq i16 %ld, 0
br i1 %cmp.i, label %exit, label %cond.false.i
cond.false.i:
%rem = urem i16 5, %ld
%extract.t = trunc i16 %rem to i8
br label %body
body:
%cond.in.i.off0 = phi i8 [ %extract.t, %cond.false.i ], [ %add, %for.inc ]
%cmp = icmp sgt i8 %cond.in.i.off0, 7
%conv5 = zext i1 %cmp to i32
store i32 %conv5, i32* %g, align 4
%.pr = load i32, i32* %k, align 4
%tobool13150 = icmp eq i32 %.pr, 0
br i1 %tobool13150, label %for.inc, label %exit
for.inc:
%add = add nuw i8 %cond.in.i.off0, 1
br label %body
exit:
ret void
}