mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 10:42:39 +01:00
[SLP] avoid leaking poison in reduction of safe boolean logic ops
This bug was introduced with D105730 / 25ee55c0baff . If we are not converting all of the operations of a reduction into a vector op, we need to preserve the existing select form of the remaining ops. Otherwise, we are potentially leaking poison where it did not in the original code. Alive2 agrees that the version that freezes some inputs and then falls back to scalar is correct: https://alive2.llvm.org/ce/z/erF4K2
This commit is contained in:
parent
2bbc0bd7c0
commit
beede36179
@ -7826,6 +7826,14 @@ public:
|
||||
if (V.isLoadCombineReductionCandidate(RdxKind))
|
||||
break;
|
||||
|
||||
// For a poison-safe boolean logic reduction, do not replace select
|
||||
// instructions with logic ops. All reduced values will be frozen (see
|
||||
// below) to prevent leaking poison.
|
||||
if (isa<SelectInst>(ReductionRoot) &&
|
||||
isBoolLogicOp(cast<Instruction>(ReductionRoot)) &&
|
||||
NumReducedVals != ReduxWidth)
|
||||
break;
|
||||
|
||||
V.computeMinimumValueSizes();
|
||||
|
||||
// Estimate cost.
|
||||
|
@ -97,16 +97,16 @@ define float @test_merge_anyof_v4sf(<4 x float> %t) {
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[T]], i32 0
|
||||
; CHECK-NEXT: [[T_FR:%.*]] = freeze <4 x float> [[T]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = fcmp olt <4 x float> [[T_FR]], zeroinitializer
|
||||
; CHECK-NEXT: [[CMP19:%.*]] = fcmp ogt float [[TMP3]], 1.000000e+00
|
||||
; CHECK-NEXT: [[CMP24:%.*]] = fcmp ogt float [[TMP2]], 1.000000e+00
|
||||
; CHECK-NEXT: [[CMP29:%.*]] = fcmp ogt float [[TMP1]], 1.000000e+00
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i1> [[TMP4]] to i4
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = icmp ne i4 [[TMP5]], 0
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP6]], [[CMP19]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP7]], [[CMP24]]
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP8]], [[CMP29]]
|
||||
; CHECK-NEXT: [[CMP19:%.*]] = fcmp ogt float [[TMP3]], 1.000000e+00
|
||||
; CHECK-NEXT: [[OR_COND3:%.*]] = select i1 [[TMP6]], i1 true, i1 [[CMP19]]
|
||||
; CHECK-NEXT: [[CMP24:%.*]] = fcmp ogt float [[TMP2]], 1.000000e+00
|
||||
; CHECK-NEXT: [[OR_COND4:%.*]] = select i1 [[OR_COND3]], i1 true, i1 [[CMP24]]
|
||||
; CHECK-NEXT: [[CMP29:%.*]] = fcmp ogt float [[TMP1]], 1.000000e+00
|
||||
; CHECK-NEXT: [[OR_COND5:%.*]] = select i1 [[OR_COND4]], i1 true, i1 [[CMP29]]
|
||||
; CHECK-NEXT: [[CMP34:%.*]] = fcmp ogt float [[TMP0]], 1.000000e+00
|
||||
; CHECK-NEXT: [[OR_COND6:%.*]] = select i1 [[TMP9]], i1 true, i1 [[CMP34]]
|
||||
; CHECK-NEXT: [[OR_COND6:%.*]] = select i1 [[OR_COND5]], i1 true, i1 [[CMP34]]
|
||||
; CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP3]], [[TMP2]]
|
||||
; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[OR_COND6]], float 0.000000e+00, float [[ADD]]
|
||||
; CHECK-NEXT: ret float [[RETVAL_0]]
|
||||
@ -269,16 +269,16 @@ define float @test_separate_anyof_v4sf(<4 x float> %t) {
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[T]], i32 0
|
||||
; CHECK-NEXT: [[T_FR:%.*]] = freeze <4 x float> [[T]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = fcmp olt <4 x float> [[T_FR]], zeroinitializer
|
||||
; CHECK-NEXT: [[CMP18:%.*]] = fcmp ogt float [[TMP3]], 1.000000e+00
|
||||
; CHECK-NEXT: [[CMP23:%.*]] = fcmp ogt float [[TMP2]], 1.000000e+00
|
||||
; CHECK-NEXT: [[CMP28:%.*]] = fcmp ogt float [[TMP1]], 1.000000e+00
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i1> [[TMP4]] to i4
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = icmp ne i4 [[TMP5]], 0
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP6]], [[CMP18]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP7]], [[CMP23]]
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP8]], [[CMP28]]
|
||||
; CHECK-NEXT: [[CMP18:%.*]] = fcmp ogt float [[TMP3]], 1.000000e+00
|
||||
; CHECK-NEXT: [[OR_COND3:%.*]] = select i1 [[TMP6]], i1 true, i1 [[CMP18]]
|
||||
; CHECK-NEXT: [[CMP23:%.*]] = fcmp ogt float [[TMP2]], 1.000000e+00
|
||||
; CHECK-NEXT: [[OR_COND4:%.*]] = select i1 [[OR_COND3]], i1 true, i1 [[CMP23]]
|
||||
; CHECK-NEXT: [[CMP28:%.*]] = fcmp ogt float [[TMP1]], 1.000000e+00
|
||||
; CHECK-NEXT: [[OR_COND5:%.*]] = select i1 [[OR_COND4]], i1 true, i1 [[CMP28]]
|
||||
; CHECK-NEXT: [[CMP33:%.*]] = fcmp ogt float [[TMP0]], 1.000000e+00
|
||||
; CHECK-NEXT: [[OR_COND6:%.*]] = select i1 [[TMP9]], i1 true, i1 [[CMP33]]
|
||||
; CHECK-NEXT: [[OR_COND6:%.*]] = select i1 [[OR_COND5]], i1 true, i1 [[CMP33]]
|
||||
; CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP3]], [[TMP2]]
|
||||
; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[OR_COND6]], float 0.000000e+00, float [[ADD]]
|
||||
; CHECK-NEXT: ret float [[RETVAL_0]]
|
||||
@ -436,16 +436,16 @@ define float @test_merge_anyof_v4si(<4 x i32> %t) {
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[T]], i32 0
|
||||
; CHECK-NEXT: [[T_FR:%.*]] = freeze <4 x i32> [[T]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = icmp slt <4 x i32> [[T_FR]], <i32 1, i32 1, i32 1, i32 1>
|
||||
; CHECK-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[TMP3]], 255
|
||||
; CHECK-NEXT: [[CMP14:%.*]] = icmp sgt i32 [[TMP2]], 255
|
||||
; CHECK-NEXT: [[CMP17:%.*]] = icmp sgt i32 [[TMP1]], 255
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i1> [[TMP4]] to i4
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = icmp ne i4 [[TMP5]], 0
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP6]], [[CMP11]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP7]], [[CMP14]]
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP8]], [[CMP17]]
|
||||
; CHECK-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[TMP3]], 255
|
||||
; CHECK-NEXT: [[OR_COND3:%.*]] = select i1 [[TMP6]], i1 true, i1 [[CMP11]]
|
||||
; CHECK-NEXT: [[CMP14:%.*]] = icmp sgt i32 [[TMP2]], 255
|
||||
; CHECK-NEXT: [[OR_COND4:%.*]] = select i1 [[OR_COND3]], i1 true, i1 [[CMP14]]
|
||||
; CHECK-NEXT: [[CMP17:%.*]] = icmp sgt i32 [[TMP1]], 255
|
||||
; CHECK-NEXT: [[OR_COND5:%.*]] = select i1 [[OR_COND4]], i1 true, i1 [[CMP17]]
|
||||
; CHECK-NEXT: [[CMP20:%.*]] = icmp sgt i32 [[TMP0]], 255
|
||||
; CHECK-NEXT: [[OR_COND6:%.*]] = select i1 [[TMP9]], i1 true, i1 [[CMP20]]
|
||||
; CHECK-NEXT: [[OR_COND6:%.*]] = select i1 [[OR_COND5]], i1 true, i1 [[CMP20]]
|
||||
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], [[TMP2]]
|
||||
; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[ADD]] to float
|
||||
; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[OR_COND6]], float 0.000000e+00, float [[CONV]]
|
||||
|
@ -168,6 +168,10 @@ define i1 @mixed_logical_icmp(<4 x i32> %x) {
|
||||
ret i1 %s3
|
||||
}
|
||||
|
||||
; TODO: This is better than all-scalar and still safe,
|
||||
; but we want this to be 2 reductions with glue
|
||||
; logic...or a wide reduction?
|
||||
|
||||
define i1 @logical_and_icmp_clamp(<4 x i32> %x) {
|
||||
; CHECK-LABEL: @logical_and_icmp_clamp(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 3
|
||||
@ -181,10 +185,10 @@ define i1 @logical_and_icmp_clamp(<4 x i32> %x) {
|
||||
; CHECK-NEXT: [[D3:%.*]] = icmp sgt i32 [[TMP1]], 17
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = freeze <4 x i1> [[TMP5]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP6]])
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = and i1 [[TMP7]], [[D0]]
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = and i1 [[TMP8]], [[D1]]
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = and i1 [[TMP9]], [[D2]]
|
||||
; CHECK-NEXT: [[S7:%.*]] = select i1 [[TMP10]], i1 [[D3]], i1 false
|
||||
; CHECK-NEXT: [[S4:%.*]] = select i1 [[TMP7]], i1 [[D0]], i1 false
|
||||
; CHECK-NEXT: [[S5:%.*]] = select i1 [[S4]], i1 [[D1]], i1 false
|
||||
; CHECK-NEXT: [[S6:%.*]] = select i1 [[S5]], i1 [[D2]], i1 false
|
||||
; CHECK-NEXT: [[S7:%.*]] = select i1 [[S6]], i1 [[D3]], i1 false
|
||||
; CHECK-NEXT: ret i1 [[S7]]
|
||||
;
|
||||
%x0 = extractelement <4 x i32> %x, i32 0
|
||||
|
Loading…
Reference in New Issue
Block a user