1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 10:42:39 +01:00

[SLP] avoid leaking poison in reduction of safe boolean logic ops

This bug was introduced with D105730 / 25ee55c0baff .

If we are not converting all of the operations of a reduction
into a vector op, we need to preserve the existing select form
of the remaining ops. Otherwise, we are potentially leaking
poison where it did not in the original code.

Alive2 agrees that the version that freezes some inputs
and then falls back to scalar is correct:
https://alive2.llvm.org/ce/z/erF4K2
This commit is contained in:
Sanjay Patel 2021-07-15 16:47:51 -04:00
parent 2bbc0bd7c0
commit beede36179
3 changed files with 37 additions and 25 deletions

View File

@ -7826,6 +7826,14 @@ public:
if (V.isLoadCombineReductionCandidate(RdxKind))
break;
// For a poison-safe boolean logic reduction, do not replace select
// instructions with logic ops. All reduced values will be frozen (see
// below) to prevent leaking poison.
if (isa<SelectInst>(ReductionRoot) &&
isBoolLogicOp(cast<Instruction>(ReductionRoot)) &&
NumReducedVals != ReduxWidth)
break;
V.computeMinimumValueSizes();
// Estimate cost.

View File

@ -97,16 +97,16 @@ define float @test_merge_anyof_v4sf(<4 x float> %t) {
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[T]], i32 0
; CHECK-NEXT: [[T_FR:%.*]] = freeze <4 x float> [[T]]
; CHECK-NEXT: [[TMP4:%.*]] = fcmp olt <4 x float> [[T_FR]], zeroinitializer
; CHECK-NEXT: [[CMP19:%.*]] = fcmp ogt float [[TMP3]], 1.000000e+00
; CHECK-NEXT: [[CMP24:%.*]] = fcmp ogt float [[TMP2]], 1.000000e+00
; CHECK-NEXT: [[CMP29:%.*]] = fcmp ogt float [[TMP1]], 1.000000e+00
; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i1> [[TMP4]] to i4
; CHECK-NEXT: [[TMP6:%.*]] = icmp ne i4 [[TMP5]], 0
; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP6]], [[CMP19]]
; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP7]], [[CMP24]]
; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP8]], [[CMP29]]
; CHECK-NEXT: [[CMP19:%.*]] = fcmp ogt float [[TMP3]], 1.000000e+00
; CHECK-NEXT: [[OR_COND3:%.*]] = select i1 [[TMP6]], i1 true, i1 [[CMP19]]
; CHECK-NEXT: [[CMP24:%.*]] = fcmp ogt float [[TMP2]], 1.000000e+00
; CHECK-NEXT: [[OR_COND4:%.*]] = select i1 [[OR_COND3]], i1 true, i1 [[CMP24]]
; CHECK-NEXT: [[CMP29:%.*]] = fcmp ogt float [[TMP1]], 1.000000e+00
; CHECK-NEXT: [[OR_COND5:%.*]] = select i1 [[OR_COND4]], i1 true, i1 [[CMP29]]
; CHECK-NEXT: [[CMP34:%.*]] = fcmp ogt float [[TMP0]], 1.000000e+00
; CHECK-NEXT: [[OR_COND6:%.*]] = select i1 [[TMP9]], i1 true, i1 [[CMP34]]
; CHECK-NEXT: [[OR_COND6:%.*]] = select i1 [[OR_COND5]], i1 true, i1 [[CMP34]]
; CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP3]], [[TMP2]]
; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[OR_COND6]], float 0.000000e+00, float [[ADD]]
; CHECK-NEXT: ret float [[RETVAL_0]]
@ -269,16 +269,16 @@ define float @test_separate_anyof_v4sf(<4 x float> %t) {
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[T]], i32 0
; CHECK-NEXT: [[T_FR:%.*]] = freeze <4 x float> [[T]]
; CHECK-NEXT: [[TMP4:%.*]] = fcmp olt <4 x float> [[T_FR]], zeroinitializer
; CHECK-NEXT: [[CMP18:%.*]] = fcmp ogt float [[TMP3]], 1.000000e+00
; CHECK-NEXT: [[CMP23:%.*]] = fcmp ogt float [[TMP2]], 1.000000e+00
; CHECK-NEXT: [[CMP28:%.*]] = fcmp ogt float [[TMP1]], 1.000000e+00
; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i1> [[TMP4]] to i4
; CHECK-NEXT: [[TMP6:%.*]] = icmp ne i4 [[TMP5]], 0
; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP6]], [[CMP18]]
; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP7]], [[CMP23]]
; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP8]], [[CMP28]]
; CHECK-NEXT: [[CMP18:%.*]] = fcmp ogt float [[TMP3]], 1.000000e+00
; CHECK-NEXT: [[OR_COND3:%.*]] = select i1 [[TMP6]], i1 true, i1 [[CMP18]]
; CHECK-NEXT: [[CMP23:%.*]] = fcmp ogt float [[TMP2]], 1.000000e+00
; CHECK-NEXT: [[OR_COND4:%.*]] = select i1 [[OR_COND3]], i1 true, i1 [[CMP23]]
; CHECK-NEXT: [[CMP28:%.*]] = fcmp ogt float [[TMP1]], 1.000000e+00
; CHECK-NEXT: [[OR_COND5:%.*]] = select i1 [[OR_COND4]], i1 true, i1 [[CMP28]]
; CHECK-NEXT: [[CMP33:%.*]] = fcmp ogt float [[TMP0]], 1.000000e+00
; CHECK-NEXT: [[OR_COND6:%.*]] = select i1 [[TMP9]], i1 true, i1 [[CMP33]]
; CHECK-NEXT: [[OR_COND6:%.*]] = select i1 [[OR_COND5]], i1 true, i1 [[CMP33]]
; CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP3]], [[TMP2]]
; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[OR_COND6]], float 0.000000e+00, float [[ADD]]
; CHECK-NEXT: ret float [[RETVAL_0]]
@ -436,16 +436,16 @@ define float @test_merge_anyof_v4si(<4 x i32> %t) {
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[T]], i32 0
; CHECK-NEXT: [[T_FR:%.*]] = freeze <4 x i32> [[T]]
; CHECK-NEXT: [[TMP4:%.*]] = icmp slt <4 x i32> [[T_FR]], <i32 1, i32 1, i32 1, i32 1>
; CHECK-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[TMP3]], 255
; CHECK-NEXT: [[CMP14:%.*]] = icmp sgt i32 [[TMP2]], 255
; CHECK-NEXT: [[CMP17:%.*]] = icmp sgt i32 [[TMP1]], 255
; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i1> [[TMP4]] to i4
; CHECK-NEXT: [[TMP6:%.*]] = icmp ne i4 [[TMP5]], 0
; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP6]], [[CMP11]]
; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP7]], [[CMP14]]
; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP8]], [[CMP17]]
; CHECK-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[TMP3]], 255
; CHECK-NEXT: [[OR_COND3:%.*]] = select i1 [[TMP6]], i1 true, i1 [[CMP11]]
; CHECK-NEXT: [[CMP14:%.*]] = icmp sgt i32 [[TMP2]], 255
; CHECK-NEXT: [[OR_COND4:%.*]] = select i1 [[OR_COND3]], i1 true, i1 [[CMP14]]
; CHECK-NEXT: [[CMP17:%.*]] = icmp sgt i32 [[TMP1]], 255
; CHECK-NEXT: [[OR_COND5:%.*]] = select i1 [[OR_COND4]], i1 true, i1 [[CMP17]]
; CHECK-NEXT: [[CMP20:%.*]] = icmp sgt i32 [[TMP0]], 255
; CHECK-NEXT: [[OR_COND6:%.*]] = select i1 [[TMP9]], i1 true, i1 [[CMP20]]
; CHECK-NEXT: [[OR_COND6:%.*]] = select i1 [[OR_COND5]], i1 true, i1 [[CMP20]]
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], [[TMP2]]
; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[ADD]] to float
; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[OR_COND6]], float 0.000000e+00, float [[CONV]]

View File

@ -168,6 +168,10 @@ define i1 @mixed_logical_icmp(<4 x i32> %x) {
ret i1 %s3
}
; TODO: This is better than all-scalar and still safe,
; but we want this to be 2 reductions with glue
; logic...or a wide reduction?
define i1 @logical_and_icmp_clamp(<4 x i32> %x) {
; CHECK-LABEL: @logical_and_icmp_clamp(
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 3
@ -181,10 +185,10 @@ define i1 @logical_and_icmp_clamp(<4 x i32> %x) {
; CHECK-NEXT: [[D3:%.*]] = icmp sgt i32 [[TMP1]], 17
; CHECK-NEXT: [[TMP6:%.*]] = freeze <4 x i1> [[TMP5]]
; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP6]])
; CHECK-NEXT: [[TMP8:%.*]] = and i1 [[TMP7]], [[D0]]
; CHECK-NEXT: [[TMP9:%.*]] = and i1 [[TMP8]], [[D1]]
; CHECK-NEXT: [[TMP10:%.*]] = and i1 [[TMP9]], [[D2]]
; CHECK-NEXT: [[S7:%.*]] = select i1 [[TMP10]], i1 [[D3]], i1 false
; CHECK-NEXT: [[S4:%.*]] = select i1 [[TMP7]], i1 [[D0]], i1 false
; CHECK-NEXT: [[S5:%.*]] = select i1 [[S4]], i1 [[D1]], i1 false
; CHECK-NEXT: [[S6:%.*]] = select i1 [[S5]], i1 [[D2]], i1 false
; CHECK-NEXT: [[S7:%.*]] = select i1 [[S6]], i1 [[D3]], i1 false
; CHECK-NEXT: ret i1 [[S7]]
;
%x0 = extractelement <4 x i32> %x, i32 0