mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 11:13:28 +01:00
[ARM] Fix for PR39060
When calculating whether a value can safely overflow for use by an icmp, we weren't checking that the value couldn't wrap around. To do this we need the icmp to be using a constant, as well as the incoming add or sub. bugzilla report: https://bugs.llvm.org/show_bug.cgi?id=39060 Differential Revision: https://reviews.llvm.org/D52463 llvm-svn: 343092
This commit is contained in:
parent
9132d88d17
commit
7284df400a
@ -247,41 +247,114 @@ static bool isSafeOverflow(Instruction *I) {
|
||||
if (isa<OverflowingBinaryOperator>(I) && I->hasNoUnsignedWrap())
|
||||
return true;
|
||||
|
||||
// We can support a, potentially, overflowing instruction (I) if:
|
||||
// - It is only used by an unsigned icmp.
|
||||
// - The icmp uses a constant.
|
||||
// - The overflowing value (I) is decreasing, i.e would underflow - wrapping
|
||||
// around zero to become a larger number than before.
|
||||
// - The underflowing instruction (I) also uses a constant.
|
||||
//
|
||||
// We can then use the two constants to calculate whether the result would
|
||||
// wrap in respect to itself in the original bitwidth. If it doesn't wrap,
|
||||
// just underflows the range, the icmp would give the same result whether the
|
||||
// result has been truncated or not. We calculate this by:
|
||||
// - Zero extending both constants, if needed, to 32-bits.
|
||||
// - Take the absolute value of I's constant, adding this to the icmp const.
|
||||
// - Check that this value is not out of range for small type. If it is, it
|
||||
// means that it has underflowed enough to wrap around the icmp constant.
|
||||
//
|
||||
// For example:
|
||||
//
|
||||
// %sub = sub i8 %a, 2
|
||||
// %cmp = icmp ule i8 %sub, 254
|
||||
//
|
||||
// If %a = 0, %sub = -2 == FE == 254
|
||||
// But if this is evalulated as a i32
|
||||
// %sub = -2 == FF FF FF FE == 4294967294
|
||||
// So the unsigned compares (i8 and i32) would not yield the same result.
|
||||
//
|
||||
// Another way to look at it is:
|
||||
// %a - 2 <= 254
|
||||
// %a + 2 <= 254 + 2
|
||||
// %a <= 256
|
||||
// And we can't represent 256 in the i8 format, so we don't support it.
|
||||
//
|
||||
// Whereas:
|
||||
//
|
||||
// %sub i8 %a, 1
|
||||
// %cmp = icmp ule i8 %sub, 254
|
||||
//
|
||||
// If %a = 0, %sub = -1 == FF == 255
|
||||
// As i32:
|
||||
// %sub = -1 == FF FF FF FF == 4294967295
|
||||
//
|
||||
// In this case, the unsigned compare results would be the same and this
|
||||
// would also be true for ult, uge and ugt:
|
||||
// - (255 < 254) == (0xFFFFFFFF < 254) == false
|
||||
// - (255 <= 254) == (0xFFFFFFFF <= 254) == false
|
||||
// - (255 > 254) == (0xFFFFFFFF > 254) == true
|
||||
// - (255 >= 254) == (0xFFFFFFFF >= 254) == true
|
||||
//
|
||||
// To demonstrate why we can't handle increasing values:
|
||||
//
|
||||
// %add = add i8 %a, 2
|
||||
// %cmp = icmp ult i8 %add, 127
|
||||
//
|
||||
// If %a = 254, %add = 256 == (i8 1)
|
||||
// As i32:
|
||||
// %add = 256
|
||||
//
|
||||
// (1 < 127) != (256 < 127)
|
||||
|
||||
unsigned Opc = I->getOpcode();
|
||||
if (Opc == Instruction::Add || Opc == Instruction::Sub) {
|
||||
// We don't care if the add or sub could wrap if the value is decreasing
|
||||
// and is only being used by an unsigned compare.
|
||||
if (!I->hasOneUse() ||
|
||||
!isa<ICmpInst>(*I->user_begin()) ||
|
||||
!isa<ConstantInt>(I->getOperand(1)))
|
||||
if (Opc != Instruction::Add && Opc != Instruction::Sub)
|
||||
return false;
|
||||
|
||||
if (!I->hasOneUse() ||
|
||||
!isa<ICmpInst>(*I->user_begin()) ||
|
||||
!isa<ConstantInt>(I->getOperand(1)))
|
||||
return false;
|
||||
|
||||
ConstantInt *OverflowConst = cast<ConstantInt>(I->getOperand(1));
|
||||
bool NegImm = OverflowConst->isNegative();
|
||||
bool IsDecreasing = ((Opc == Instruction::Sub) && !NegImm) ||
|
||||
((Opc == Instruction::Add) && NegImm);
|
||||
if (!IsDecreasing)
|
||||
return false;
|
||||
|
||||
// Don't support an icmp that deals with sign bits.
|
||||
auto *CI = cast<ICmpInst>(*I->user_begin());
|
||||
if (CI->isSigned() || CI->isEquality())
|
||||
return false;
|
||||
|
||||
ConstantInt *ICmpConst = nullptr;
|
||||
if (auto *Const = dyn_cast<ConstantInt>(CI->getOperand(0)))
|
||||
ICmpConst = Const;
|
||||
else if (auto *Const = dyn_cast<ConstantInt>(CI->getOperand(1)))
|
||||
ICmpConst = Const;
|
||||
else
|
||||
return false;
|
||||
|
||||
// Now check that the result can't wrap on itself.
|
||||
APInt Total = ICmpConst->getValue().getBitWidth() < 32 ?
|
||||
ICmpConst->getValue().zext(32) : ICmpConst->getValue();
|
||||
|
||||
Total += OverflowConst->getValue().getBitWidth() < 32 ?
|
||||
OverflowConst->getValue().abs().zext(32) : OverflowConst->getValue().abs();
|
||||
|
||||
APInt Max = APInt::getAllOnesValue(ARMCodeGenPrepare::TypeSize);
|
||||
|
||||
if (Total.getBitWidth() > Max.getBitWidth()) {
|
||||
if (Total.ugt(Max.zext(Total.getBitWidth())))
|
||||
return false;
|
||||
|
||||
auto *CI = cast<ICmpInst>(*I->user_begin());
|
||||
|
||||
// Don't support an icmp that deals with sign bits, including negative
|
||||
// immediates
|
||||
if (CI->isSigned())
|
||||
} else if (Max.getBitWidth() > Total.getBitWidth()) {
|
||||
if (Total.zext(Max.getBitWidth()).ugt(Max))
|
||||
return false;
|
||||
} else if (Total.ugt(Max))
|
||||
return false;
|
||||
|
||||
if (auto *Const = dyn_cast<ConstantInt>(CI->getOperand(0)))
|
||||
if (Const->isNegative())
|
||||
return false;
|
||||
|
||||
if (auto *Const = dyn_cast<ConstantInt>(CI->getOperand(1)))
|
||||
if (Const->isNegative())
|
||||
return false;
|
||||
|
||||
bool NegImm = cast<ConstantInt>(I->getOperand(1))->isNegative();
|
||||
bool IsDecreasing = ((Opc == Instruction::Sub) && !NegImm) ||
|
||||
((Opc == Instruction::Add) && NegImm);
|
||||
if (!IsDecreasing)
|
||||
return false;
|
||||
|
||||
LLVM_DEBUG(dbgs() << "ARM CGP: Allowing safe overflow for " << *I << "\n");
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
LLVM_DEBUG(dbgs() << "ARM CGP: Allowing safe overflow for " << *I << "\n");
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool shouldPromote(Value *V) {
|
||||
@ -459,6 +532,8 @@ void IRPromoter::Mutate(Type *OrigTy,
|
||||
if (!shouldPromote(V) || isPromotedResultSafe(V))
|
||||
continue;
|
||||
|
||||
assert(EnableDSP && "DSP intrinisc insertion not enabled!");
|
||||
|
||||
// Replace unsafe instructions with appropriate intrinsic calls.
|
||||
InsertDSPIntrinsic(cast<Instruction>(V));
|
||||
}
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: llc -mtriple=thumbv8.main -mcpu=cortex-m33 %s -arm-disable-cgp=false -o - | FileCheck %s
|
||||
; RUN: llc -mtriple=thumbv8m.main -mcpu=cortex-m33 %s -arm-disable-cgp=false -o - | FileCheck %s
|
||||
|
||||
; CHECK: overflow_add
|
||||
; CHECK: add
|
||||
@ -47,3 +47,134 @@ define zeroext i16 @overflow_shl(i16 zeroext %a, i16 zeroext %b) {
|
||||
%res = select i1 %cmp, i16 2, i16 5
|
||||
ret i16 %res
|
||||
}
|
||||
|
||||
; CHECK-LABEL: overflow_add_no_consts:
|
||||
; CHECK: add r0, r1
|
||||
; CHECK: uxtb [[EXT:r[0-9]+]], r0
|
||||
; CHECK: cmp [[EXT]], r2
|
||||
; CHECK: movhi r0, #8
|
||||
define i32 @overflow_add_no_consts(i8 zeroext %a, i8 zeroext %b, i8 zeroext %limit) {
|
||||
%add = add i8 %a, %b
|
||||
%cmp = icmp ugt i8 %add, %limit
|
||||
%res = select i1 %cmp, i32 8, i32 16
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
; CHECK-LABEL: overflow_add_const_limit:
|
||||
; CHECK: add r0, r1
|
||||
; CHECK: uxtb [[EXT:r[0-9]+]], r0
|
||||
; CHECK: cmp [[EXT]], #128
|
||||
; CHECK: movhi r0, #8
|
||||
define i32 @overflow_add_const_limit(i8 zeroext %a, i8 zeroext %b) {
|
||||
%add = add i8 %a, %b
|
||||
%cmp = icmp ugt i8 %add, 128
|
||||
%res = select i1 %cmp, i32 8, i32 16
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
; CHECK-LABEL: overflow_add_positive_const_limit:
|
||||
; CHECK: adds r0, #1
|
||||
; CHECK: uxtb [[EXT:r[0-9]+]], r0
|
||||
; CHECK: cmp [[EXT]], #128
|
||||
; CHECK: movhi r0, #8
|
||||
define i32 @overflow_add_positive_const_limit(i8 zeroext %a) {
|
||||
%add = add i8 %a, 1
|
||||
%cmp = icmp ugt i8 %add, 128
|
||||
%res = select i1 %cmp, i32 8, i32 16
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
; CHECK-LABEL: unsafe_add_underflow:
|
||||
; CHECK: subs r0, #2
|
||||
; CHECK: uxtb [[EXT:r[0-9]+]], r0
|
||||
; CHECK: cmp [[EXT]], #255
|
||||
; CHECK: moveq r0, #8
|
||||
define i32 @unsafe_add_underflow(i8 zeroext %a) {
|
||||
%add = add i8 %a, -2
|
||||
%cmp = icmp ugt i8 %add, 254
|
||||
%res = select i1 %cmp, i32 8, i32 16
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
; CHECK-LABEL: safe_add_underflow:
|
||||
; CHECK: subs [[MINUS_1:r[0-9]+]], r0, #1
|
||||
; CHECK-NOT: uxtb
|
||||
; CHECK: cmp [[MINUS_1]], #254
|
||||
; CHECK: movhi r0, #8
|
||||
define i32 @safe_add_underflow(i8 zeroext %a) {
|
||||
%add = add i8 %a, -1
|
||||
%cmp = icmp ugt i8 %add, 254
|
||||
%res = select i1 %cmp, i32 8, i32 16
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
; CHECK-LABEL: safe_add_underflow_neg:
|
||||
; CHECK: subs [[MINUS_1:r[0-9]+]], r0, #2
|
||||
; CHECK-NOT: uxtb
|
||||
; CHECK: cmp [[MINUS_1]], #251
|
||||
; CHECK: movlo r0, #8
|
||||
define i32 @safe_add_underflow_neg(i8 zeroext %a) {
|
||||
%add = add i8 %a, -2
|
||||
%cmp = icmp ule i8 %add, -6
|
||||
%res = select i1 %cmp, i32 8, i32 16
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
; CHECK-LABEL: overflow_sub_negative_const_limit:
|
||||
; CHECK: adds r0, #1
|
||||
; CHECK: uxtb [[EXT:r[0-9]+]], r0
|
||||
; CHECK: cmp [[EXT]], #128
|
||||
; CHECK: movhi r0, #8
|
||||
define i32 @overflow_sub_negative_const_limit(i8 zeroext %a) {
|
||||
%sub = sub i8 %a, -1
|
||||
%cmp = icmp ugt i8 %sub, 128
|
||||
%res = select i1 %cmp, i32 8, i32 16
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
; CHECK-LABEL: unsafe_sub_underflow:
|
||||
; CHECK: subs r0, #6
|
||||
; CHECK: uxtb [[EXT:r[0-9]+]], r0
|
||||
; CHECK: cmp [[EXT]], #250
|
||||
; CHECK: movhi r0, #8
|
||||
define i32 @unsafe_sub_underflow(i8 zeroext %a) {
|
||||
%sub = sub i8 %a, 6
|
||||
%cmp = icmp ugt i8 %sub, 250
|
||||
%res = select i1 %cmp, i32 8, i32 16
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
; CHECK-LABEL: safe_sub_underflow:
|
||||
; CHECK: subs [[MINUS_1:r[0-9]+]], r0, #1
|
||||
; CHECK-NOT: uxtb
|
||||
; CHECK: cmp [[MINUS_1]], #255
|
||||
; CHECK: movlo r0, #8
|
||||
define i32 @safe_sub_underflow(i8 zeroext %a) {
|
||||
%sub = sub i8 %a, 1
|
||||
%cmp = icmp ule i8 %sub, 254
|
||||
%res = select i1 %cmp, i32 8, i32 16
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
; CHECK-LABEL: safe_sub_underflow_neg
|
||||
; CHECK: subs [[MINUS_1:r[0-9]+]], r0, #4
|
||||
; CHECK-NOT: uxtb
|
||||
; CHECK: cmp [[MINUS_1]], #250
|
||||
; CHECK: movhi r0, #8
|
||||
define i32 @safe_sub_underflow_neg(i8 zeroext %a) {
|
||||
%sub = sub i8 %a, 4
|
||||
%cmp = icmp uge i8 %sub, -5
|
||||
%res = select i1 %cmp, i32 8, i32 16
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
; CHECK: subs r0, #4
|
||||
; CHECK: uxtb [[EXT:r[0-9]+]], r0
|
||||
; CHECK: cmp [[EXT]], #253
|
||||
; CHECK: movlo r0, #8
|
||||
define i32 @unsafe_sub_underflow_neg(i8 zeroext %a) {
|
||||
%sub = sub i8 %a, 4
|
||||
%cmp = icmp ult i8 %sub, -3
|
||||
%res = select i1 %cmp, i32 8, i32 16
|
||||
ret i32 %res
|
||||
}
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: llc -mtriple=thumbv8.main -mcpu=cortex-m33 -arm-disable-cgp=false -mattr=-use-misched %s -o - | FileCheck %s --check-prefix=CHECK-COMMON --check-prefix=CHECK-NODSP
|
||||
; RUN: llc -mtriple=thumbv8m.main -mcpu=cortex-m33 -arm-disable-cgp=false -mattr=-use-misched %s -o - | FileCheck %s --check-prefix=CHECK-COMMON --check-prefix=CHECK-NODSP
|
||||
; RUN: llc -mtriple=thumbv7em %s -arm-disable-cgp=false -arm-enable-scalar-dsp=true -o - | FileCheck %s --check-prefix=CHECK-COMMON --check-prefix=CHECK-DSP
|
||||
; RUN: llc -mtriple=thumbv8 %s -arm-disable-cgp=false -arm-enable-scalar-dsp=true -arm-enable-scalar-dsp-imms=true -o - | FileCheck %s --check-prefix=CHECK-COMMON --check-prefix=CHECK-DSP-IMM
|
||||
|
||||
@ -47,13 +47,22 @@ entry:
|
||||
; CHECK-NODSP: cmp
|
||||
; CHECK-NODSP: cmp
|
||||
|
||||
; CHECK-DSP: sxth [[ARG:r[0-9]+]], r2
|
||||
; CHECK-DSP: subs [[SUB:r[0-9]+]],
|
||||
; CHECK-DSP: uadd16 [[ADD:r[0-9]+]],
|
||||
; CHECK-DSP: sxth.w [[SEXT:r[0-9]+]], [[ADD]]
|
||||
; CHECK-DSP: cmp [[SEXT]], [[ARG]]
|
||||
; CHECK-DSP-NOT: uxt
|
||||
; CHECK-DSP: cmp [[SUB]], r2
|
||||
; CHECK-DSP: sub
|
||||
; CHECK-DSP: sxth
|
||||
; CHECK-DSP: add
|
||||
; CHECK-DSP: uxth
|
||||
; CHECK-DSP: sxth
|
||||
; CHECK-DSP: cmp
|
||||
; CHECK-DSP: cmp
|
||||
|
||||
; CHECK-DSP-IMM: sxth [[ARG:r[0-9]+]], r2
|
||||
; CHECK-DSP-IMM: uadd16 [[ADD:r[0-9]+]],
|
||||
; CHECK-DSP-IMM: sxth.w [[SEXT:r[0-9]+]], [[ADD]]
|
||||
; CHECK-DSP-IMM: cmp [[SEXT]], [[ARG]]
|
||||
; CHECK-DSP-IMM-NOT: uxt
|
||||
; CHECK-DSP-IMM: movs [[ONE:r[0-9]+]], #1
|
||||
; CHECK-DSP-IMM: usub16 [[SUB:r[0-9]+]], r1, [[ONE]]
|
||||
; CHECK-DSP-IMM: cmp [[SUB]], r2
|
||||
define i16 @ugt_slt(i16 *%x, i16 zeroext %y, i16 zeroext %z) {
|
||||
entry:
|
||||
%load0 = load i16, i16* %x, align 1
|
||||
|
33
test/CodeGen/ARM/pr39060.ll
Normal file
33
test/CodeGen/ARM/pr39060.ll
Normal file
@ -0,0 +1,33 @@
|
||||
; RUN: llc -mtriple=armv7a-linux-androideabi %s -o - | FileCheck %s
|
||||
|
||||
@a = local_unnamed_addr global i16 -1, align 2
|
||||
@b = local_unnamed_addr global i16 0, align 2
|
||||
|
||||
; CHECK-LABEL: pr39060:
|
||||
; CHECK: ldrh
|
||||
; CHECK: ldrh
|
||||
; CHECK: sub
|
||||
; CHECK: uxth
|
||||
define void @pr39060() local_unnamed_addr #0 {
|
||||
entry:
|
||||
%0 = load i16, i16* @a, align 2
|
||||
%1 = load i16, i16* @b, align 2
|
||||
%sub = add i16 %1, -1
|
||||
%cmp = icmp eq i16 %0, %sub
|
||||
br i1 %cmp, label %if.else, label %if.then
|
||||
|
||||
if.then:
|
||||
tail call void bitcast (void (...)* @f to void ()*)() #2
|
||||
br label %if.end
|
||||
|
||||
if.else:
|
||||
tail call void bitcast (void (...)* @g to void ()*)() #2
|
||||
br label %if.end
|
||||
|
||||
if.end:
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @f(...) local_unnamed_addr #1
|
||||
|
||||
declare void @g(...) local_unnamed_addr #1
|
Loading…
Reference in New Issue
Block a user