mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 18:54:02 +01:00
[InstCombine] Fix miscompile on GEP+load to icmp fold (PR45210)
As noted in PR45210: https://bugs.llvm.org/show_bug.cgi?id=45210 ...the bug is triggered as Eli say when sext(idx) * ElementSize overflows. ``` // assume that GV is an array of 4-byte elements GEP = gep GV, 0, Idx // this is accessing Idx * 4 L = load GEP ICI = icmp eq L, value => ICI = icmp eq Idx, NewIdx ``` The foldCmpLoadFromIndexedGlobal function simplifies GEP+load operation to icmp. And there is a problem because Idx * ElementSize can overflow. Let's assume that the wanted value is at offset 0. Then, there are actually four possible values for Idx to match offset 0: 0x00..00, 0x40..00, 0x80..00, 0xC0..00. We should return true for all these values, but currently, the new icmp only returns true for 0x00..00. This problem can be solved by masking off (trailing zeros of ElementSize) bits from Idx. ``` ... => Idx' = and Idx, 0x3F..FF ICI = icmp eq Idx', NewIdx ``` Reviewed By: efriedma Differential Revision: https://reviews.llvm.org/D99481
This commit is contained in:
parent
3f31f63125
commit
73cd3d2fbf
@ -279,9 +279,28 @@ InstCombinerImpl::foldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP,
|
||||
Idx = Builder.CreateTrunc(Idx, IntPtrTy);
|
||||
}
|
||||
|
||||
// If inbounds keyword is not present, Idx * ElementSize can overflow.
|
||||
// Let's assume that ElementSize is 2 and the wanted value is at offset 0.
|
||||
// Then, there are two possible values for Idx to match offset 0:
|
||||
// 0x00..00, 0x80..00.
|
||||
// Emitting 'icmp eq Idx, 0' isn't correct in this case because the
|
||||
// comparison is false if Idx was 0x80..00.
|
||||
// We need to erase the highest countTrailingZeros(ElementSize) bits of Idx.
|
||||
unsigned ElementSize =
|
||||
DL.getTypeAllocSize(Init->getType()->getArrayElementType());
|
||||
auto MaskIdx = [&](Value* Idx){
|
||||
if (!GEP->isInBounds() && countTrailingZeros(ElementSize) != 0) {
|
||||
Value *Mask = ConstantInt::get(Idx->getType(), -1);
|
||||
Mask = Builder.CreateLShr(Mask, countTrailingZeros(ElementSize));
|
||||
Idx = Builder.CreateAnd(Idx, Mask);
|
||||
}
|
||||
return Idx;
|
||||
};
|
||||
|
||||
// If the comparison is only true for one or two elements, emit direct
|
||||
// comparisons.
|
||||
if (SecondTrueElement != Overdefined) {
|
||||
Idx = MaskIdx(Idx);
|
||||
// None true -> false.
|
||||
if (FirstTrueElement == Undefined)
|
||||
return replaceInstUsesWith(ICI, Builder.getFalse());
|
||||
@ -302,6 +321,7 @@ InstCombinerImpl::foldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP,
|
||||
// If the comparison is only false for one or two elements, emit direct
|
||||
// comparisons.
|
||||
if (SecondFalseElement != Overdefined) {
|
||||
Idx = MaskIdx(Idx);
|
||||
// None false -> true.
|
||||
if (FirstFalseElement == Undefined)
|
||||
return replaceInstUsesWith(ICI, Builder.getTrue());
|
||||
@ -323,6 +343,7 @@ InstCombinerImpl::foldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP,
|
||||
// where it is true, emit the range check.
|
||||
if (TrueRangeEnd != Overdefined) {
|
||||
assert(TrueRangeEnd != FirstTrueElement && "Should emit single compare");
|
||||
Idx = MaskIdx(Idx);
|
||||
|
||||
// Generate (i-FirstTrue) <u (TrueRangeEnd-FirstTrue+1).
|
||||
if (FirstTrueElement) {
|
||||
@ -338,6 +359,7 @@ InstCombinerImpl::foldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP,
|
||||
// False range check.
|
||||
if (FalseRangeEnd != Overdefined) {
|
||||
assert(FalseRangeEnd != FirstFalseElement && "Should emit single compare");
|
||||
Idx = MaskIdx(Idx);
|
||||
// Generate (i-FirstFalse) >u (FalseRangeEnd-FirstFalse).
|
||||
if (FirstFalseElement) {
|
||||
Value *Offs = ConstantInt::get(Idx->getType(), -FirstFalseElement);
|
||||
@ -364,6 +386,7 @@ InstCombinerImpl::foldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP,
|
||||
Ty = DL.getSmallestLegalIntType(Init->getContext(), ArrayElementCount);
|
||||
|
||||
if (Ty) {
|
||||
Idx = MaskIdx(Idx);
|
||||
Value *V = Builder.CreateIntCast(Idx, Ty, false);
|
||||
V = Builder.CreateLShr(ConstantInt::get(Ty, MagicBitvector), V);
|
||||
V = Builder.CreateAnd(ConstantInt::get(Ty, 1), V);
|
||||
|
@ -2,22 +2,22 @@
|
||||
; RUN: opt -instcombine -S -data-layout="p:32:32:32-p1:16:16:16-n8:16:32:64" < %s | FileCheck %s
|
||||
|
||||
@G16 = internal constant [10 x i16] [i16 35, i16 82, i16 69, i16 81, i16 85,
|
||||
i16 73, i16 82, i16 69, i16 68, i16 0]
|
||||
i16 73, i16 82, i16 69, i16 68, i16 0]
|
||||
|
||||
@G16_as1 = internal addrspace(1) constant [10 x i16] [i16 35, i16 82, i16 69, i16 81, i16 85,
|
||||
i16 73, i16 82, i16 69, i16 68, i16 0]
|
||||
i16 73, i16 82, i16 69, i16 68, i16 0]
|
||||
|
||||
@GD = internal constant [6 x double]
|
||||
[double -10.0, double 1.0, double 4.0, double 2.0, double -20.0, double -40.0]
|
||||
[double -10.0, double 1.0, double 4.0, double 2.0, double -20.0, double -40.0]
|
||||
|
||||
%Foo = type { i32, i32, i32, i32 }
|
||||
|
||||
@GS = internal constant %Foo { i32 1, i32 4, i32 9, i32 14 }
|
||||
|
||||
@GStructArr = internal constant [4 x %Foo] [ %Foo { i32 1, i32 4, i32 9, i32 14 },
|
||||
%Foo { i32 5, i32 4, i32 6, i32 11 },
|
||||
%Foo { i32 6, i32 5, i32 9, i32 20 },
|
||||
%Foo { i32 12, i32 3, i32 9, i32 8 } ]
|
||||
%Foo { i32 5, i32 4, i32 6, i32 11 },
|
||||
%Foo { i32 6, i32 5, i32 9, i32 20 },
|
||||
%Foo { i32 12, i32 3, i32 9, i32 8 } ]
|
||||
|
||||
|
||||
define i1 @test1(i32 %X) {
|
||||
@ -33,7 +33,8 @@ define i1 @test1(i32 %X) {
|
||||
|
||||
define i1 @test1_noinbounds(i32 %X) {
|
||||
; CHECK-LABEL: @test1_noinbounds(
|
||||
; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[X:%.*]], 9
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[X:%.*]], 2147483647
|
||||
; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[TMP1]], 9
|
||||
; CHECK-NEXT: ret i1 [[R]]
|
||||
;
|
||||
%P = getelementptr [10 x i16], [10 x i16]* @G16, i32 0, i32 %X
|
||||
@ -44,8 +45,8 @@ define i1 @test1_noinbounds(i32 %X) {
|
||||
|
||||
define i1 @test1_noinbounds_i64(i64 %X) {
|
||||
; CHECK-LABEL: @test1_noinbounds_i64(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[X:%.*]] to i32
|
||||
; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[TMP1]], 9
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[X:%.*]], 2147483647
|
||||
; CHECK-NEXT: [[R:%.*]] = icmp eq i64 [[TMP1]], 9
|
||||
; CHECK-NEXT: ret i1 [[R]]
|
||||
;
|
||||
%P = getelementptr [10 x i16], [10 x i16]* @G16, i64 0, i64 %X
|
||||
@ -56,8 +57,8 @@ define i1 @test1_noinbounds_i64(i64 %X) {
|
||||
|
||||
define i1 @test1_noinbounds_as1(i32 %x) {
|
||||
; CHECK-LABEL: @test1_noinbounds_as1(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[X:%.*]] to i16
|
||||
; CHECK-NEXT: [[R:%.*]] = icmp eq i16 [[TMP1]], 9
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[X:%.*]], 32767
|
||||
; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[TMP1]], 9
|
||||
; CHECK-NEXT: ret i1 [[R]]
|
||||
;
|
||||
%p = getelementptr [10 x i16], [10 x i16] addrspace(1)* @G16_as1, i16 0, i32 %x
|
||||
@ -260,7 +261,8 @@ define i1 @test10_struct_arr(i32 %x) {
|
||||
|
||||
define i1 @test10_struct_arr_noinbounds(i32 %x) {
|
||||
; CHECK-LABEL: @test10_struct_arr_noinbounds(
|
||||
; CHECK-NEXT: [[R:%.*]] = icmp ne i32 [[X:%.*]], 1
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[X:%.*]], 268435455
|
||||
; CHECK-NEXT: [[R:%.*]] = icmp ne i32 [[TMP1]], 1
|
||||
; CHECK-NEXT: ret i1 [[R]]
|
||||
;
|
||||
%p = getelementptr [4 x %Foo], [4 x %Foo]* @GStructArr, i32 0, i32 %x, i32 2
|
||||
@ -294,7 +296,9 @@ define i1 @test10_struct_arr_i64(i64 %x) {
|
||||
|
||||
define i1 @test10_struct_arr_noinbounds_i16(i16 %x) {
|
||||
; CHECK-LABEL: @test10_struct_arr_noinbounds_i16(
|
||||
; CHECK-NEXT: [[R:%.*]] = icmp ne i16 [[X:%.*]], 1
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = sext i16 [[X:%.*]] to i32
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 268435455
|
||||
; CHECK-NEXT: [[R:%.*]] = icmp ne i32 [[TMP2]], 1
|
||||
; CHECK-NEXT: ret i1 [[R]]
|
||||
;
|
||||
%p = getelementptr [4 x %Foo], [4 x %Foo]* @GStructArr, i32 0, i16 %x, i32 2
|
||||
@ -305,8 +309,8 @@ define i1 @test10_struct_arr_noinbounds_i16(i16 %x) {
|
||||
|
||||
define i1 @test10_struct_arr_noinbounds_i64(i64 %x) {
|
||||
; CHECK-LABEL: @test10_struct_arr_noinbounds_i64(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[X:%.*]] to i32
|
||||
; CHECK-NEXT: [[R:%.*]] = icmp ne i32 [[TMP1]], 1
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[X:%.*]], 268435455
|
||||
; CHECK-NEXT: [[R:%.*]] = icmp ne i64 [[TMP1]], 1
|
||||
; CHECK-NEXT: ret i1 [[R]]
|
||||
;
|
||||
%p = getelementptr [4 x %Foo], [4 x %Foo]* @GStructArr, i32 0, i64 %x, i32 2
|
||||
|
Loading…
Reference in New Issue
Block a user