mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
[InstCombine] reassociate diff of sums into sum of diffs
This is the integer sibling to D81491. (a[0] + a[1] + a[2] + a[3]) - (b[0] + b[1] + b[2] +b[3]) --> (a[0] - b[0]) + (a[1] - b[1]) + (a[2] - b[2]) + (a[3] - b[3]) Removing the "experimental" from these intrinsics is likely not too far away.
This commit is contained in:
parent
1e920f102e
commit
d6c0ccc19e
@ -1787,6 +1787,21 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
|
||||
return BinaryOperator::CreateSub(XZ, YW);
|
||||
}
|
||||
|
||||
auto m_AddRdx = [](Value *&Vec) {
|
||||
return m_OneUse(
|
||||
m_Intrinsic<Intrinsic::experimental_vector_reduce_add>(m_Value(Vec)));
|
||||
};
|
||||
Value *V0, *V1;
|
||||
if (match(Op0, m_AddRdx(V0)) && match(Op1, m_AddRdx(V1)) &&
|
||||
V0->getType() == V1->getType()) {
|
||||
// Difference of sums is sum of differences:
|
||||
// add_rdx(V0) - add_rdx(V1) --> add_rdx(V0 - V1)
|
||||
Value *Sub = Builder.CreateSub(V0, V1);
|
||||
Value *Rdx = Builder.CreateIntrinsic(
|
||||
Intrinsic::experimental_vector_reduce_add, {Sub->getType()}, {Sub});
|
||||
return replaceInstUsesWith(I, Rdx);
|
||||
}
|
||||
|
||||
if (Constant *C = dyn_cast<Constant>(Op0)) {
|
||||
Value *X;
|
||||
if (match(Op1, m_ZExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1))
|
||||
|
@ -88,10 +88,9 @@ define float @diff_of_sums_type_mismatch(float %a0, <4 x float> %v0, float %a1,
|
||||
|
||||
define i32 @diff_of_sums_v4i32(<4 x i32> %v0, <4 x i32> %v1) {
|
||||
; CHECK-LABEL: @diff_of_sums_v4i32(
|
||||
; CHECK-NEXT: [[R0:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[V0:%.*]])
|
||||
; CHECK-NEXT: [[R1:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[V1:%.*]])
|
||||
; CHECK-NEXT: [[R:%.*]] = sub i32 [[R0]], [[R1]]
|
||||
; CHECK-NEXT: ret i32 [[R]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = sub <4 x i32> [[V0:%.*]], [[V1:%.*]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP1]])
|
||||
; CHECK-NEXT: ret i32 [[TMP2]]
|
||||
;
|
||||
%r0 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %v0)
|
||||
%r1 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %v1)
|
||||
@ -99,6 +98,8 @@ define i32 @diff_of_sums_v4i32(<4 x i32> %v0, <4 x i32> %v1) {
|
||||
ret i32 %r
|
||||
}
|
||||
|
||||
; negative test - extra uses could create extra instructions
|
||||
|
||||
define i32 @diff_of_sums_v4i32_extra_use1(<4 x i32> %v0, <4 x i32> %v1) {
|
||||
; CHECK-LABEL: @diff_of_sums_v4i32_extra_use1(
|
||||
; CHECK-NEXT: [[R0:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[V0:%.*]])
|
||||
@ -114,6 +115,8 @@ define i32 @diff_of_sums_v4i32_extra_use1(<4 x i32> %v0, <4 x i32> %v1) {
|
||||
ret i32 %r
|
||||
}
|
||||
|
||||
; negative test - extra uses could create extra instructions
|
||||
|
||||
define i32 @diff_of_sums_v4i32_extra_use2(<4 x i32> %v0, <4 x i32> %v1) {
|
||||
; CHECK-LABEL: @diff_of_sums_v4i32_extra_use2(
|
||||
; CHECK-NEXT: [[R0:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[V0:%.*]])
|
||||
@ -129,6 +132,8 @@ define i32 @diff_of_sums_v4i32_extra_use2(<4 x i32> %v0, <4 x i32> %v1) {
|
||||
ret i32 %r
|
||||
}
|
||||
|
||||
; negative test - can't reassociate different vector types
|
||||
|
||||
define i32 @diff_of_sums_type_mismatch2(<8 x i32> %v0, <4 x i32> %v1) {
|
||||
; CHECK-LABEL: @diff_of_sums_type_mismatch2(
|
||||
; CHECK-NEXT: [[R0:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> [[V0:%.*]])
|
||||
|
@ -132,10 +132,9 @@ define i32 @TestVectorsEqual_alt(i32* noalias %Vec0, i32* noalias %Vec1, i32 %To
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[VEC1:%.*]] to <4 x i32>*
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP2]], align 4
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP1]])
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP3]])
|
||||
; CHECK-NEXT: [[ADD_3:%.*]] = sub i32 [[TMP4]], [[TMP5]]
|
||||
; CHECK-NEXT: [[CMP3:%.*]] = icmp ule i32 [[ADD_3]], [[TOLERANCE:%.*]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = sub <4 x i32> [[TMP1]], [[TMP3]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP4]])
|
||||
; CHECK-NEXT: [[CMP3:%.*]] = icmp ule i32 [[TMP5]], [[TOLERANCE:%.*]]
|
||||
; CHECK-NEXT: [[COND:%.*]] = zext i1 [[CMP3]] to i32
|
||||
; CHECK-NEXT: ret i32 [[COND]]
|
||||
;
|
||||
|
Loading…
x
Reference in New Issue
Block a user