1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 03:02:36 +01:00

[VectorCombine] Freeze index unless it is known to be non-poison.

If the index itself is already poison, the poison propagates through
instructions clamping the index to a valid range. This still causes
introducing a load of poison, as flagged by Alive2 and pointed out
at 575e2aff5574.

This patch updates the code to freeze the index, unless it is proven to
not be poison.

Reviewed By: nlopes

Differential Revision: https://reviews.llvm.org/D103378
This commit is contained in:
Florian Hahn 2021-06-01 10:20:42 +01:00
parent 9fec15e2c5
commit 00bbac35a4
2 changed files with 24 additions and 14 deletions

View File

@ -911,8 +911,13 @@ bool VectorCombine::scalarizeLoadExtract(Instruction &I) {
for (User *U : LI->users()) {
auto *EI = cast<ExtractElementInst>(U);
Builder.SetInsertPoint(EI);
Value *GEP = Builder.CreateInBoundsGEP(
FixedVT, Ptr, {Builder.getInt32(0), EI->getOperand(1)});
Value *Idx = EI->getOperand(1);
if (!isGuaranteedNotToBePoison(Idx, &AC, LI, &DT))
Idx = Builder.CreateFreeze(Idx);
Value *GEP =
Builder.CreateInBoundsGEP(FixedVT, Ptr, {Builder.getInt32(0), Idx});
auto *NewLoad = cast<LoadInst>(Builder.CreateLoad(
FixedVT->getElementType(), GEP, EI->getName() + ".scalar"));

View File

@ -147,8 +147,9 @@ define i32 @load_extract_idx_var_i64_known_valid_by_and(<4 x i32>* %x, i64 %idx)
; CHECK-LABEL: @load_extract_idx_var_i64_known_valid_by_and(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[IDX_CLAMPED:%.*]] = and i64 [[IDX:%.*]], 3
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[X:%.*]], i32 0, i64 [[IDX_CLAMPED]]
; CHECK-NEXT: [[R:%.*]] = load i32, i32* [[TMP0]], align 4
; CHECK-NEXT: [[TMP0:%.*]] = freeze i64 [[IDX_CLAMPED]]
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[X:%.*]], i32 0, i64 [[TMP0]]
; CHECK-NEXT: [[R:%.*]] = load i32, i32* [[TMP1]], align 4
; CHECK-NEXT: ret i32 [[R]]
;
entry:
@ -192,8 +193,9 @@ define i32 @load_extract_idx_var_i64_known_valid_by_urem(<4 x i32>* %x, i64 %idx
; CHECK-LABEL: @load_extract_idx_var_i64_known_valid_by_urem(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[IDX_CLAMPED:%.*]] = urem i64 [[IDX:%.*]], 4
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[X:%.*]], i32 0, i64 [[IDX_CLAMPED]]
; CHECK-NEXT: [[R:%.*]] = load i32, i32* [[TMP0]], align 4
; CHECK-NEXT: [[TMP0:%.*]] = freeze i64 [[IDX_CLAMPED]]
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[X:%.*]], i32 0, i64 [[TMP0]]
; CHECK-NEXT: [[R:%.*]] = load i32, i32* [[TMP1]], align 4
; CHECK-NEXT: ret i32 [[R]]
;
entry:
@ -569,10 +571,12 @@ define i32 @load_multiple_extracts_with_variable_indices_large_vector_all_valid_
; CHECK-LABEL: @load_multiple_extracts_with_variable_indices_large_vector_all_valid_by_and(
; CHECK-NEXT: [[IDX_0_CLAMPED:%.*]] = and i64 [[IDX_0:%.*]], 15
; CHECK-NEXT: [[IDX_1_CLAMPED:%.*]] = and i64 [[IDX_1:%.*]], 15
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <16 x i32>, <16 x i32>* [[X:%.*]], i32 0, i64 [[IDX_0_CLAMPED]]
; CHECK-NEXT: [[E_0:%.*]] = load i32, i32* [[TMP1]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <16 x i32>, <16 x i32>* [[X]], i32 0, i64 [[IDX_1_CLAMPED]]
; CHECK-NEXT: [[E_1:%.*]] = load i32, i32* [[TMP2]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = freeze i64 [[IDX_0_CLAMPED]]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <16 x i32>, <16 x i32>* [[X:%.*]], i32 0, i64 [[TMP1]]
; CHECK-NEXT: [[E_0:%.*]] = load i32, i32* [[TMP2]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = freeze i64 [[IDX_1_CLAMPED]]
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds <16 x i32>, <16 x i32>* [[X]], i32 0, i64 [[TMP3]]
; CHECK-NEXT: [[E_1:%.*]] = load i32, i32* [[TMP4]], align 4
; CHECK-NEXT: [[RES:%.*]] = add i32 [[E_0]], [[E_1]]
; CHECK-NEXT: ret i32 [[RES]]
;
@ -590,10 +594,11 @@ define i32 @load_multiple_extracts_with_variable_indices_large_vector_all_valid_
; CHECK-LABEL: @load_multiple_extracts_with_variable_indices_large_vector_all_valid_by_and_some_noundef(
; CHECK-NEXT: [[IDX_0_CLAMPED:%.*]] = and i64 [[IDX_0:%.*]], 15
; CHECK-NEXT: [[IDX_1_CLAMPED:%.*]] = and i64 [[IDX_1:%.*]], 15
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <16 x i32>, <16 x i32>* [[X:%.*]], i32 0, i64 [[IDX_0_CLAMPED]]
; CHECK-NEXT: [[E_0:%.*]] = load i32, i32* [[TMP1]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <16 x i32>, <16 x i32>* [[X]], i32 0, i64 [[IDX_1_CLAMPED]]
; CHECK-NEXT: [[E_1:%.*]] = load i32, i32* [[TMP2]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = freeze i64 [[IDX_0_CLAMPED]]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <16 x i32>, <16 x i32>* [[X:%.*]], i32 0, i64 [[TMP1]]
; CHECK-NEXT: [[E_0:%.*]] = load i32, i32* [[TMP2]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds <16 x i32>, <16 x i32>* [[X]], i32 0, i64 [[IDX_1_CLAMPED]]
; CHECK-NEXT: [[E_1:%.*]] = load i32, i32* [[TMP3]], align 4
; CHECK-NEXT: [[RES:%.*]] = add i32 [[E_0]], [[E_1]]
; CHECK-NEXT: ret i32 [[RES]]
;