1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-26 12:43:36 +01:00

[InstCombine] Fix miscompile on GEP+load to icmp fold (PR45210)

As noted in PR45210: https://bugs.llvm.org/show_bug.cgi?id=45210
...the bug is triggered as Eli say when sext(idx) * ElementSize overflows.

```
   // assume that GV is an array of 4-byte elements
   GEP = gep GV, 0, Idx // this is accessing Idx * 4
   L = load GEP
   ICI = icmp eq L, value
 =>
   ICI = icmp eq Idx, NewIdx
```

The foldCmpLoadFromIndexedGlobal function simplifies GEP+load operation to icmp.
And there is a problem because Idx * ElementSize can overflow.

Let's assume that the wanted value is at offset 0.
Then, there are actually four possible values for Idx to match offset 0: 0x00..00, 0x40..00, 0x80..00, 0xC0..00.
We should return true for all these values, but currently, the new icmp only returns true for 0x00..00.

This problem can be solved by masking off (trailing zeros of ElementSize) bits from Idx.

```
   ...
 =>
   Idx' = and Idx, 0x3F..FF
   ICI = icmp eq Idx', NewIdx
```

Reviewed By: efriedma

Differential Revision: https://reviews.llvm.org/D99481
This commit is contained in:
hyeongyukim 2021-06-17 19:28:24 +09:00 committed by Juneyoung Lee
parent 3f31f63125
commit 73cd3d2fbf
2 changed files with 42 additions and 15 deletions

View File

@ -279,9 +279,28 @@ InstCombinerImpl::foldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP,
Idx = Builder.CreateTrunc(Idx, IntPtrTy);
}
// If inbounds keyword is not present, Idx * ElementSize can overflow.
// Let's assume that ElementSize is 2 and the wanted value is at offset 0.
// Then, there are two possible values for Idx to match offset 0:
// 0x00..00, 0x80..00.
// Emitting 'icmp eq Idx, 0' isn't correct in this case because the
// comparison is false if Idx was 0x80..00.
// We need to erase the highest countTrailingZeros(ElementSize) bits of Idx.
unsigned ElementSize =
DL.getTypeAllocSize(Init->getType()->getArrayElementType());
auto MaskIdx = [&](Value* Idx){
if (!GEP->isInBounds() && countTrailingZeros(ElementSize) != 0) {
Value *Mask = ConstantInt::get(Idx->getType(), -1);
Mask = Builder.CreateLShr(Mask, countTrailingZeros(ElementSize));
Idx = Builder.CreateAnd(Idx, Mask);
}
return Idx;
};
// If the comparison is only true for one or two elements, emit direct
// comparisons.
if (SecondTrueElement != Overdefined) {
Idx = MaskIdx(Idx);
// None true -> false.
if (FirstTrueElement == Undefined)
return replaceInstUsesWith(ICI, Builder.getFalse());
@ -302,6 +321,7 @@ InstCombinerImpl::foldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP,
// If the comparison is only false for one or two elements, emit direct
// comparisons.
if (SecondFalseElement != Overdefined) {
Idx = MaskIdx(Idx);
// None false -> true.
if (FirstFalseElement == Undefined)
return replaceInstUsesWith(ICI, Builder.getTrue());
@ -323,6 +343,7 @@ InstCombinerImpl::foldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP,
// where it is true, emit the range check.
if (TrueRangeEnd != Overdefined) {
assert(TrueRangeEnd != FirstTrueElement && "Should emit single compare");
Idx = MaskIdx(Idx);
// Generate (i-FirstTrue) <u (TrueRangeEnd-FirstTrue+1).
if (FirstTrueElement) {
@ -338,6 +359,7 @@ InstCombinerImpl::foldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP,
// False range check.
if (FalseRangeEnd != Overdefined) {
assert(FalseRangeEnd != FirstFalseElement && "Should emit single compare");
Idx = MaskIdx(Idx);
// Generate (i-FirstFalse) >u (FalseRangeEnd-FirstFalse).
if (FirstFalseElement) {
Value *Offs = ConstantInt::get(Idx->getType(), -FirstFalseElement);
@ -364,6 +386,7 @@ InstCombinerImpl::foldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP,
Ty = DL.getSmallestLegalIntType(Init->getContext(), ArrayElementCount);
if (Ty) {
Idx = MaskIdx(Idx);
Value *V = Builder.CreateIntCast(Idx, Ty, false);
V = Builder.CreateLShr(ConstantInt::get(Ty, MagicBitvector), V);
V = Builder.CreateAnd(ConstantInt::get(Ty, 1), V);

View File

@ -33,7 +33,8 @@ define i1 @test1(i32 %X) {
define i1 @test1_noinbounds(i32 %X) {
; CHECK-LABEL: @test1_noinbounds(
; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[X:%.*]], 9
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[X:%.*]], 2147483647
; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[TMP1]], 9
; CHECK-NEXT: ret i1 [[R]]
;
%P = getelementptr [10 x i16], [10 x i16]* @G16, i32 0, i32 %X
@ -44,8 +45,8 @@ define i1 @test1_noinbounds(i32 %X) {
define i1 @test1_noinbounds_i64(i64 %X) {
; CHECK-LABEL: @test1_noinbounds_i64(
; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[X:%.*]] to i32
; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[TMP1]], 9
; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[X:%.*]], 2147483647
; CHECK-NEXT: [[R:%.*]] = icmp eq i64 [[TMP1]], 9
; CHECK-NEXT: ret i1 [[R]]
;
%P = getelementptr [10 x i16], [10 x i16]* @G16, i64 0, i64 %X
@ -56,8 +57,8 @@ define i1 @test1_noinbounds_i64(i64 %X) {
define i1 @test1_noinbounds_as1(i32 %x) {
; CHECK-LABEL: @test1_noinbounds_as1(
; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[X:%.*]] to i16
; CHECK-NEXT: [[R:%.*]] = icmp eq i16 [[TMP1]], 9
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[X:%.*]], 32767
; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[TMP1]], 9
; CHECK-NEXT: ret i1 [[R]]
;
%p = getelementptr [10 x i16], [10 x i16] addrspace(1)* @G16_as1, i16 0, i32 %x
@ -260,7 +261,8 @@ define i1 @test10_struct_arr(i32 %x) {
define i1 @test10_struct_arr_noinbounds(i32 %x) {
; CHECK-LABEL: @test10_struct_arr_noinbounds(
; CHECK-NEXT: [[R:%.*]] = icmp ne i32 [[X:%.*]], 1
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[X:%.*]], 268435455
; CHECK-NEXT: [[R:%.*]] = icmp ne i32 [[TMP1]], 1
; CHECK-NEXT: ret i1 [[R]]
;
%p = getelementptr [4 x %Foo], [4 x %Foo]* @GStructArr, i32 0, i32 %x, i32 2
@ -294,7 +296,9 @@ define i1 @test10_struct_arr_i64(i64 %x) {
define i1 @test10_struct_arr_noinbounds_i16(i16 %x) {
; CHECK-LABEL: @test10_struct_arr_noinbounds_i16(
; CHECK-NEXT: [[R:%.*]] = icmp ne i16 [[X:%.*]], 1
; CHECK-NEXT: [[TMP1:%.*]] = sext i16 [[X:%.*]] to i32
; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 268435455
; CHECK-NEXT: [[R:%.*]] = icmp ne i32 [[TMP2]], 1
; CHECK-NEXT: ret i1 [[R]]
;
%p = getelementptr [4 x %Foo], [4 x %Foo]* @GStructArr, i32 0, i16 %x, i32 2
@ -305,8 +309,8 @@ define i1 @test10_struct_arr_noinbounds_i16(i16 %x) {
define i1 @test10_struct_arr_noinbounds_i64(i64 %x) {
; CHECK-LABEL: @test10_struct_arr_noinbounds_i64(
; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[X:%.*]] to i32
; CHECK-NEXT: [[R:%.*]] = icmp ne i32 [[TMP1]], 1
; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[X:%.*]], 268435455
; CHECK-NEXT: [[R:%.*]] = icmp ne i64 [[TMP1]], 1
; CHECK-NEXT: ret i1 [[R]]
;
%p = getelementptr [4 x %Foo], [4 x %Foo]* @GStructArr, i32 0, i64 %x, i32 2