1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 11:02:59 +02:00

[ValueTracking] recognize variations of 'clamp' to improve codegen (PR31693)

By enhancing value tracking, we allow an existing min/max canonicalization to
kick in and improve codegen for several targets that have min/max instructions.

Unfortunately, recognizing min/max in value tracking may cause us to hit
a hack in InstCombiner::visitICmpInst() more often:
http://lists.llvm.org/pipermail/llvm-dev/2017-January/109340.html
...but I'm hoping we can remove that soon.

Correctness proofs based on Alive:

Name: smaxmin
Pre: C1 < C2
%cmp2 = icmp slt i8 %x, C2
%min = select i1 %cmp2, i8 %x, i8 C2
%cmp3 = icmp slt i8 %x, C1
%r = select i1 %cmp3, i8 C1, i8 %min
=>
%cmp2 = icmp slt i8 %x, C2
%min = select i1 %cmp2, i8 %x, i8 C2
%cmp1 = icmp sgt i8 %min, C1
%r = select i1 %cmp1, i8 %min, i8 C1

Name: sminmax
Pre: C1 > C2
%cmp2 = icmp sgt i8 %x, C2
%max = select i1 %cmp2, i8 %x, i8 C2
%cmp3 = icmp sgt i8 %x, C1
%r = select i1 %cmp3, i8 C1, i8 %max
=>
%cmp2 = icmp sgt i8 %x, C2
%max = select i1 %cmp2, i8 %x, i8 C2
%cmp1 = icmp slt i8 %max, C1
%r = select i1 %cmp1, i8 %max, i8 C1

---------------------------------------- 
Optimization: smaxmin 
Done: 1 
Optimization is correct! 
---------------------------------------- 
Optimization: sminmax 
Done: 1 
Optimization is correct!



Name: umaxmin
Pre: C1 u< C2
%cmp2 = icmp ult i8 %x, C2
%min = select i1 %cmp2, i8 %x, i8 C2
%cmp3 = icmp ult i8 %x, C1
%r = select i1 %cmp3, i8 C1, i8 %min
=>
%cmp2 = icmp ult i8 %x, C2
%min = select i1 %cmp2, i8 %x, i8 C2
%cmp1 = icmp ugt i8 %min, C1
%r = select i1 %cmp1, i8 %min, i8 C1

Name: uminmax
Pre: C1 u> C2
%cmp2 = icmp ugt i8 %x, C2
%max = select i1 %cmp2, i8 %x, i8 C2
%cmp3 = icmp ugt i8 %x, C1
%r = select i1 %cmp3, i8 C1, i8 %max
=>
%cmp2 = icmp ugt i8 %x, C2
%max = select i1 %cmp2, i8 %x, i8 C2
%cmp1 = icmp ult i8 %max, C1
%r = select i1 %cmp1, i8 %max, i8 C1

---------------------------------------- 
Optimization: umaxmin 
Done: 1 
Optimization is correct! 
---------------------------------------- 
Optimization: uminmax 
Done: 1 
Optimization is correct! 
 

llvm-svn: 292660
This commit is contained in:
Sanjay Patel 2017-01-20 22:18:47 +00:00
parent 04b1dc5f67
commit fc6ca85023
3 changed files with 55 additions and 27 deletions

View File

@ -3918,6 +3918,45 @@ static SelectPatternResult matchMinMax(CmpInst::Predicate Pred,
Value *CmpLHS, Value *CmpRHS,
Value *TrueVal, Value *FalseVal,
Value *&LHS, Value *&RHS) {
// Recognize variations of:
// CLAMP(v,l,h) ==> ((v) < (l) ? (l) : ((v) > (h) ? (h) : (v)))
const APInt *C1;
if (CmpRHS == TrueVal && match(CmpRHS, m_APInt(C1))) {
const APInt *C2;
// (X <s C1) ? C1 : SMIN(X, C2) ==> SMAX(SMIN(X, C2), C1)
if (match(FalseVal, m_SMin(m_Specific(CmpLHS), m_APInt(C2))) &&
C1->slt(*C2) && Pred == CmpInst::ICMP_SLT) {
LHS = TrueVal;
RHS = FalseVal;
return {SPF_SMAX, SPNB_NA, false};
}
// (X >s C1) ? C1 : SMAX(X, C2) ==> SMIN(SMAX(X, C2), C1)
if (match(FalseVal, m_SMax(m_Specific(CmpLHS), m_APInt(C2))) &&
C1->sgt(*C2) && Pred == CmpInst::ICMP_SGT) {
LHS = TrueVal;
RHS = FalseVal;
return {SPF_SMIN, SPNB_NA, false};
}
// (X <u C1) ? C1 : UMIN(X, C2) ==> UMAX(UMIN(X, C2), C1)
if (match(FalseVal, m_UMin(m_Specific(CmpLHS), m_APInt(C2))) &&
C1->ult(*C2) && Pred == CmpInst::ICMP_ULT) {
LHS = TrueVal;
RHS = FalseVal;
return {SPF_UMAX, SPNB_NA, false};
}
// (X >u C1) ? C1 : UMAX(X, C2) ==> UMIN(UMAX(X, C2), C1)
if (match(FalseVal, m_UMax(m_Specific(CmpLHS), m_APInt(C2))) &&
C1->ugt(*C2) && Pred == CmpInst::ICMP_UGT) {
LHS = TrueVal;
RHS = FalseVal;
return {SPF_UMIN, SPNB_NA, false};
}
}
if (Pred != CmpInst::ICMP_SGT && Pred != CmpInst::ICMP_SLT)
return {SPF_UNKNOWN, SPNB_NA, false};
@ -3941,7 +3980,6 @@ static SelectPatternResult matchMinMax(CmpInst::Predicate Pred,
return {Pred == CmpInst::ICMP_SGT ? SPF_SMAX : SPF_SMIN, SPNB_NA, false};
}
const APInt *C1;
if (!match(CmpRHS, m_APInt(C1)))
return {SPF_UNKNOWN, SPNB_NA, false};

View File

@ -1,5 +1,4 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s
; These are actually tests of ValueTracking, and so may have test coverage in InstCombine or other
@ -165,10 +164,8 @@ define <4 x i32> @umin_vec2(<4 x i32> %x) {
define <4 x i32> @clamp_signed1(<4 x i32> %x) {
; CHECK-LABEL: clamp_signed1:
; CHECK: # BB#0:
; CHECK-NEXT: vpminsd {{.*}}(%rip), %xmm0, %xmm1
; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [15,15,15,15]
; CHECK-NEXT: vpcmpgtd %xmm0, %xmm2, %xmm0
; CHECK-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0
; CHECK-NEXT: vpminsd {{.*}}(%rip), %xmm0, %xmm0
; CHECK-NEXT: vpmaxsd {{.*}}(%rip), %xmm0, %xmm0
; CHECK-NEXT: retq
%cmp2 = icmp slt <4 x i32> %x, <i32 255, i32 255, i32 255, i32 255>
%min = select <4 x i1> %cmp2, <4 x i32> %x, <4 x i32><i32 255, i32 255, i32 255, i32 255>
@ -182,10 +179,8 @@ define <4 x i32> @clamp_signed1(<4 x i32> %x) {
define <4 x i32> @clamp_signed2(<4 x i32> %x) {
; CHECK-LABEL: clamp_signed2:
; CHECK: # BB#0:
; CHECK-NEXT: vpmaxsd {{.*}}(%rip), %xmm0, %xmm1
; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [255,255,255,255]
; CHECK-NEXT: vpcmpgtd %xmm2, %xmm0, %xmm0
; CHECK-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0
; CHECK-NEXT: vpmaxsd {{.*}}(%rip), %xmm0, %xmm0
; CHECK-NEXT: vpminsd {{.*}}(%rip), %xmm0, %xmm0
; CHECK-NEXT: retq
%cmp2 = icmp sgt <4 x i32> %x, <i32 15, i32 15, i32 15, i32 15>
%max = select <4 x i1> %cmp2, <4 x i32> %x, <4 x i32><i32 15, i32 15, i32 15, i32 15>
@ -199,11 +194,8 @@ define <4 x i32> @clamp_signed2(<4 x i32> %x) {
define <4 x i32> @clamp_unsigned1(<4 x i32> %x) {
; CHECK-LABEL: clamp_unsigned1:
; CHECK: # BB#0:
; CHECK-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1
; CHECK-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [2147483663,2147483663,2147483663,2147483663]
; CHECK-NEXT: vpcmpgtd %xmm0, %xmm2, %xmm0
; CHECK-NEXT: vblendvps %xmm0, {{.*}}(%rip), %xmm1, %xmm0
; CHECK-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm0
; CHECK-NEXT: vpmaxud {{.*}}(%rip), %xmm0, %xmm0
; CHECK-NEXT: retq
%cmp2 = icmp ult <4 x i32> %x, <i32 255, i32 255, i32 255, i32 255>
%min = select <4 x i1> %cmp2, <4 x i32> %x, <4 x i32><i32 255, i32 255, i32 255, i32 255>
@ -217,10 +209,8 @@ define <4 x i32> @clamp_unsigned1(<4 x i32> %x) {
define <4 x i32> @clamp_unsigned2(<4 x i32> %x) {
; CHECK-LABEL: clamp_unsigned2:
; CHECK: # BB#0:
; CHECK-NEXT: vpmaxud {{.*}}(%rip), %xmm0, %xmm1
; CHECK-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; CHECK-NEXT: vpcmpgtd {{.*}}(%rip), %xmm0, %xmm0
; CHECK-NEXT: vblendvps %xmm0, {{.*}}(%rip), %xmm1, %xmm0
; CHECK-NEXT: vpmaxud {{.*}}(%rip), %xmm0, %xmm0
; CHECK-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm0
; CHECK-NEXT: retq
%cmp2 = icmp ugt <4 x i32> %x, <i32 15, i32 15, i32 15, i32 15>
%max = select <4 x i1> %cmp2, <4 x i32> %x, <4 x i32><i32 15, i32 15, i32 15, i32 15>

View File

@ -348,8 +348,8 @@ define i32 @clamp_signed1(i32 %x) {
; CHECK-LABEL: @clamp_signed1(
; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 %x, 255
; CHECK-NEXT: [[MIN:%.*]] = select i1 [[CMP2]], i32 %x, i32 255
; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 %x, 15
; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP1]], i32 15, i32 [[MIN]]
; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[MIN]], 15
; CHECK-NEXT: [[R:%.*]] = select i1 [[TMP1]], i32 [[MIN]], i32 15
; CHECK-NEXT: ret i32 [[R]]
;
%cmp2 = icmp slt i32 %x, 255
@ -365,8 +365,8 @@ define i32 @clamp_signed2(i32 %x) {
; CHECK-LABEL: @clamp_signed2(
; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i32 %x, 15
; CHECK-NEXT: [[MAX:%.*]] = select i1 [[CMP2]], i32 %x, i32 15
; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 %x, 255
; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP1]], i32 255, i32 [[MAX]]
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[MAX]], 255
; CHECK-NEXT: [[R:%.*]] = select i1 [[TMP1]], i32 [[MAX]], i32 255
; CHECK-NEXT: ret i32 [[R]]
;
%cmp2 = icmp sgt i32 %x, 15
@ -382,8 +382,8 @@ define i32 @clamp_unsigned1(i32 %x) {
; CHECK-LABEL: @clamp_unsigned1(
; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i32 %x, 255
; CHECK-NEXT: [[MIN:%.*]] = select i1 [[CMP2]], i32 %x, i32 255
; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i32 %x, 15
; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP1]], i32 15, i32 [[MIN]]
; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[MIN]], 15
; CHECK-NEXT: [[R:%.*]] = select i1 [[TMP1]], i32 [[MIN]], i32 15
; CHECK-NEXT: ret i32 [[R]]
;
%cmp2 = icmp ult i32 %x, 255
@ -399,8 +399,8 @@ define i32 @clamp_unsigned2(i32 %x) {
; CHECK-LABEL: @clamp_unsigned2(
; CHECK-NEXT: [[CMP2:%.*]] = icmp ugt i32 %x, 15
; CHECK-NEXT: [[MAX:%.*]] = select i1 [[CMP2]], i32 %x, i32 15
; CHECK-NEXT: [[CMP1:%.*]] = icmp ugt i32 %x, 255
; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP1]], i32 255, i32 [[MAX]]
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[MAX]], 255
; CHECK-NEXT: [[R:%.*]] = select i1 [[TMP1]], i32 [[MAX]], i32 255
; CHECK-NEXT: ret i32 [[R]]
;
%cmp2 = icmp ugt i32 %x, 15