1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 20:51:52 +01:00

[InstCombine] reassociate diff of sums into sum of diffs

This is the integer sibling to D81491.

(a[0] + a[1] + a[2] + a[3]) - (b[0] + b[1] + b[2] +b[3]) -->
(a[0] - b[0]) + (a[1] - b[1]) + (a[2] - b[2]) + (a[3] - b[3])

Removing the "experimental" from these intrinsics is likely
not too far away.
This commit is contained in:
Sanjay Patel 2020-06-22 20:45:00 -04:00
parent 1e920f102e
commit d6c0ccc19e
3 changed files with 27 additions and 8 deletions

View File

@ -1787,6 +1787,21 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
return BinaryOperator::CreateSub(XZ, YW);
}
auto m_AddRdx = [](Value *&Vec) {
return m_OneUse(
m_Intrinsic<Intrinsic::experimental_vector_reduce_add>(m_Value(Vec)));
};
Value *V0, *V1;
if (match(Op0, m_AddRdx(V0)) && match(Op1, m_AddRdx(V1)) &&
V0->getType() == V1->getType()) {
// Difference of sums is sum of differences:
// add_rdx(V0) - add_rdx(V1) --> add_rdx(V0 - V1)
Value *Sub = Builder.CreateSub(V0, V1);
Value *Rdx = Builder.CreateIntrinsic(
Intrinsic::experimental_vector_reduce_add, {Sub->getType()}, {Sub});
return replaceInstUsesWith(I, Rdx);
}
if (Constant *C = dyn_cast<Constant>(Op0)) {
Value *X;
if (match(Op1, m_ZExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1))

View File

@ -88,10 +88,9 @@ define float @diff_of_sums_type_mismatch(float %a0, <4 x float> %v0, float %a1,
define i32 @diff_of_sums_v4i32(<4 x i32> %v0, <4 x i32> %v1) {
; CHECK-LABEL: @diff_of_sums_v4i32(
; CHECK-NEXT: [[R0:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[V0:%.*]])
; CHECK-NEXT: [[R1:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[V1:%.*]])
; CHECK-NEXT: [[R:%.*]] = sub i32 [[R0]], [[R1]]
; CHECK-NEXT: ret i32 [[R]]
; CHECK-NEXT: [[TMP1:%.*]] = sub <4 x i32> [[V0:%.*]], [[V1:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP1]])
; CHECK-NEXT: ret i32 [[TMP2]]
;
%r0 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %v0)
%r1 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %v1)
@ -99,6 +98,8 @@ define i32 @diff_of_sums_v4i32(<4 x i32> %v0, <4 x i32> %v1) {
ret i32 %r
}
; negative test - extra uses could create extra instructions
define i32 @diff_of_sums_v4i32_extra_use1(<4 x i32> %v0, <4 x i32> %v1) {
; CHECK-LABEL: @diff_of_sums_v4i32_extra_use1(
; CHECK-NEXT: [[R0:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[V0:%.*]])
@ -114,6 +115,8 @@ define i32 @diff_of_sums_v4i32_extra_use1(<4 x i32> %v0, <4 x i32> %v1) {
ret i32 %r
}
; negative test - extra uses could create extra instructions
define i32 @diff_of_sums_v4i32_extra_use2(<4 x i32> %v0, <4 x i32> %v1) {
; CHECK-LABEL: @diff_of_sums_v4i32_extra_use2(
; CHECK-NEXT: [[R0:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[V0:%.*]])
@ -129,6 +132,8 @@ define i32 @diff_of_sums_v4i32_extra_use2(<4 x i32> %v0, <4 x i32> %v1) {
ret i32 %r
}
; negative test - can't reassociate different vector types
define i32 @diff_of_sums_type_mismatch2(<8 x i32> %v0, <4 x i32> %v1) {
; CHECK-LABEL: @diff_of_sums_type_mismatch2(
; CHECK-NEXT: [[R0:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> [[V0:%.*]])

View File

@ -132,10 +132,9 @@ define i32 @TestVectorsEqual_alt(i32* noalias %Vec0, i32* noalias %Vec1, i32 %To
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[VEC1:%.*]] to <4 x i32>*
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP2]], align 4
; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP1]])
; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP3]])
; CHECK-NEXT: [[ADD_3:%.*]] = sub i32 [[TMP4]], [[TMP5]]
; CHECK-NEXT: [[CMP3:%.*]] = icmp ule i32 [[ADD_3]], [[TOLERANCE:%.*]]
; CHECK-NEXT: [[TMP4:%.*]] = sub <4 x i32> [[TMP1]], [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP4]])
; CHECK-NEXT: [[CMP3:%.*]] = icmp ule i32 [[TMP5]], [[TOLERANCE:%.*]]
; CHECK-NEXT: [[COND:%.*]] = zext i1 [[CMP3]] to i32
; CHECK-NEXT: ret i32 [[COND]]
;