mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
[DAG] Simplify OR(X,SHL(Y,BW/2)) eq/ne 0/-1 'all/any-of' style patterns
Attempt to simplify all/any-of style patterns that concatenate 2 smaller integers together into an and(x,y)/or(x,y) + icmp 0/-1 instead. This is mainly to help some bool predicate reduction patterns where we end up concatenating bool vectors that have been bitcasted to integers. Differential Revision: https://reviews.llvm.org/D93599
This commit is contained in:
parent
88551a2c36
commit
ed64f8bab5
@ -3956,6 +3956,67 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
|
||||
if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift(
|
||||
VT, N0, N1, Cond, DCI, dl))
|
||||
return CC;
|
||||
|
||||
// For all/any comparisons, replace or(x,shl(y,bw/2)) with and/or(x,y).
|
||||
// For example, when high 32-bits of i64 X are known clear:
|
||||
// all bits clear: (X | (Y<<32)) == 0 --> (X | Y) == 0
|
||||
// all bits set: (X | (Y<<32)) == -1 --> (X & Y) == -1
|
||||
bool CmpZero = N1C->getAPIntValue().isNullValue();
|
||||
bool CmpNegOne = N1C->getAPIntValue().isAllOnesValue();
|
||||
if ((CmpZero || CmpNegOne) && N0.hasOneUse()) {
|
||||
// Match or(lo,shl(hi,bw/2)) pattern.
|
||||
auto IsConcat = [&](SDValue V, SDValue &Lo, SDValue &Hi) {
|
||||
unsigned EltBits = V.getScalarValueSizeInBits();
|
||||
if (V.getOpcode() != ISD::OR || (EltBits % 2) != 0)
|
||||
return false;
|
||||
SDValue LHS = V.getOperand(0);
|
||||
SDValue RHS = V.getOperand(1);
|
||||
APInt HiBits = APInt::getHighBitsSet(EltBits, EltBits / 2);
|
||||
// Unshifted element must have zero upperbits.
|
||||
if (RHS.getOpcode() == ISD::SHL &&
|
||||
isa<ConstantSDNode>(RHS.getOperand(1)) &&
|
||||
RHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
|
||||
DAG.MaskedValueIsZero(LHS, HiBits)) {
|
||||
Lo = LHS;
|
||||
Hi = RHS.getOperand(0);
|
||||
return true;
|
||||
}
|
||||
if (LHS.getOpcode() == ISD::SHL &&
|
||||
isa<ConstantSDNode>(LHS.getOperand(1)) &&
|
||||
LHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
|
||||
DAG.MaskedValueIsZero(RHS, HiBits)) {
|
||||
Lo = RHS;
|
||||
Hi = LHS.getOperand(0);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
};
|
||||
|
||||
auto MergeConcat = [&](SDValue Lo, SDValue Hi) {
|
||||
unsigned EltBits = N0.getScalarValueSizeInBits();
|
||||
unsigned HalfBits = EltBits / 2;
|
||||
APInt HiBits = APInt::getHighBitsSet(EltBits, HalfBits);
|
||||
SDValue LoBits = DAG.getConstant(~HiBits, dl, OpVT);
|
||||
SDValue HiMask = DAG.getNode(ISD::AND, dl, OpVT, Hi, LoBits);
|
||||
SDValue NewN0 =
|
||||
DAG.getNode(CmpZero ? ISD::OR : ISD::AND, dl, OpVT, Lo, HiMask);
|
||||
SDValue NewN1 = CmpZero ? DAG.getConstant(0, dl, OpVT) : LoBits;
|
||||
return DAG.getSetCC(dl, VT, NewN0, NewN1, Cond);
|
||||
};
|
||||
|
||||
SDValue Lo, Hi;
|
||||
if (IsConcat(N0, Lo, Hi))
|
||||
return MergeConcat(Lo, Hi);
|
||||
|
||||
if (N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR) {
|
||||
SDValue Lo0, Lo1, Hi0, Hi1;
|
||||
if (IsConcat(N0.getOperand(0), Lo0, Hi0) &&
|
||||
IsConcat(N0.getOperand(1), Lo1, Hi1)) {
|
||||
return MergeConcat(DAG.getNode(N0.getOpcode(), dl, OpVT, Lo0, Lo1),
|
||||
DAG.getNode(N0.getOpcode(), dl, OpVT, Hi0, Hi1));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If we have "setcc X, C0", check to see if we can shrink the immediate
|
||||
|
@ -2148,18 +2148,15 @@ define void @ktest_2(<32 x float> %in, float * %base) {
|
||||
;
|
||||
; KNL-LABEL: ktest_2:
|
||||
; KNL: ## %bb.0:
|
||||
; KNL-NEXT: vcmpgtps 64(%rdi), %zmm1, %k1
|
||||
; KNL-NEXT: vcmpgtps (%rdi), %zmm0, %k2
|
||||
; KNL-NEXT: vmovups 4(%rdi), %zmm2 {%k2} {z}
|
||||
; KNL-NEXT: vmovups 68(%rdi), %zmm3 {%k1} {z}
|
||||
; KNL-NEXT: vcmpltps %zmm3, %zmm1, %k0
|
||||
; KNL-NEXT: vcmpltps %zmm2, %zmm0, %k3
|
||||
; KNL-NEXT: vcmpgtps (%rdi), %zmm0, %k1
|
||||
; KNL-NEXT: vcmpgtps 64(%rdi), %zmm1, %k2
|
||||
; KNL-NEXT: vmovups 68(%rdi), %zmm2 {%k2} {z}
|
||||
; KNL-NEXT: vmovups 4(%rdi), %zmm3 {%k1} {z}
|
||||
; KNL-NEXT: vcmpltps %zmm3, %zmm0, %k0
|
||||
; KNL-NEXT: vcmpltps %zmm2, %zmm1, %k3
|
||||
; KNL-NEXT: korw %k3, %k2, %k2
|
||||
; KNL-NEXT: kmovw %k2, %eax
|
||||
; KNL-NEXT: korw %k0, %k1, %k0
|
||||
; KNL-NEXT: kmovw %k0, %ecx
|
||||
; KNL-NEXT: shll $16, %ecx
|
||||
; KNL-NEXT: orl %eax, %ecx
|
||||
; KNL-NEXT: kortestw %k2, %k0
|
||||
; KNL-NEXT: je LBB45_2
|
||||
; KNL-NEXT: ## %bb.1: ## %L1
|
||||
; KNL-NEXT: vmovaps %zmm0, (%rdi)
|
||||
@ -2220,18 +2217,15 @@ define void @ktest_2(<32 x float> %in, float * %base) {
|
||||
;
|
||||
; AVX512DQ-LABEL: ktest_2:
|
||||
; AVX512DQ: ## %bb.0:
|
||||
; AVX512DQ-NEXT: vcmpgtps 64(%rdi), %zmm1, %k1
|
||||
; AVX512DQ-NEXT: vcmpgtps (%rdi), %zmm0, %k2
|
||||
; AVX512DQ-NEXT: vmovups 4(%rdi), %zmm2 {%k2} {z}
|
||||
; AVX512DQ-NEXT: vmovups 68(%rdi), %zmm3 {%k1} {z}
|
||||
; AVX512DQ-NEXT: vcmpltps %zmm3, %zmm1, %k0
|
||||
; AVX512DQ-NEXT: vcmpltps %zmm2, %zmm0, %k3
|
||||
; AVX512DQ-NEXT: vcmpgtps (%rdi), %zmm0, %k1
|
||||
; AVX512DQ-NEXT: vcmpgtps 64(%rdi), %zmm1, %k2
|
||||
; AVX512DQ-NEXT: vmovups 68(%rdi), %zmm2 {%k2} {z}
|
||||
; AVX512DQ-NEXT: vmovups 4(%rdi), %zmm3 {%k1} {z}
|
||||
; AVX512DQ-NEXT: vcmpltps %zmm3, %zmm0, %k0
|
||||
; AVX512DQ-NEXT: vcmpltps %zmm2, %zmm1, %k3
|
||||
; AVX512DQ-NEXT: korw %k3, %k2, %k2
|
||||
; AVX512DQ-NEXT: kmovw %k2, %eax
|
||||
; AVX512DQ-NEXT: korw %k0, %k1, %k0
|
||||
; AVX512DQ-NEXT: kmovw %k0, %ecx
|
||||
; AVX512DQ-NEXT: shll $16, %ecx
|
||||
; AVX512DQ-NEXT: orl %eax, %ecx
|
||||
; AVX512DQ-NEXT: kortestw %k2, %k0
|
||||
; AVX512DQ-NEXT: je LBB45_2
|
||||
; AVX512DQ-NEXT: ## %bb.1: ## %L1
|
||||
; AVX512DQ-NEXT: vmovaps %zmm0, (%rdi)
|
||||
@ -4861,15 +4855,12 @@ define void @ktest_6(<32 x i16> %w, <32 x i16> %x, <32 x i16> %y, <32 x i16> %z)
|
||||
; KNL-NEXT: vpcmpeqw %ymm5, %ymm3, %ymm3
|
||||
; KNL-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
|
||||
; KNL-NEXT: vpternlogq $200, %zmm1, %zmm0, %zmm2
|
||||
; KNL-NEXT: vpmovsxwd %ymm2, %zmm0
|
||||
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
|
||||
; KNL-NEXT: kmovw %k0, %eax
|
||||
; KNL-NEXT: vextracti64x4 $1, %zmm2, %ymm0
|
||||
; KNL-NEXT: vpor %ymm0, %ymm2, %ymm0
|
||||
; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
|
||||
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
|
||||
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
|
||||
; KNL-NEXT: kmovw %k0, %ecx
|
||||
; KNL-NEXT: shll $16, %ecx
|
||||
; KNL-NEXT: orl %eax, %ecx
|
||||
; KNL-NEXT: kortestw %k0, %k0
|
||||
; KNL-NEXT: je LBB77_1
|
||||
; KNL-NEXT: ## %bb.2: ## %exit
|
||||
; KNL-NEXT: vzeroupper
|
||||
@ -4945,15 +4936,12 @@ define void @ktest_6(<32 x i16> %w, <32 x i16> %x, <32 x i16> %y, <32 x i16> %z)
|
||||
; AVX512DQ-NEXT: vpcmpeqw %ymm5, %ymm3, %ymm3
|
||||
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
|
||||
; AVX512DQ-NEXT: vpternlogq $200, %zmm1, %zmm0, %zmm2
|
||||
; AVX512DQ-NEXT: vpmovsxwd %ymm2, %zmm0
|
||||
; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0
|
||||
; AVX512DQ-NEXT: kmovw %k0, %eax
|
||||
; AVX512DQ-NEXT: vextracti64x4 $1, %zmm2, %ymm0
|
||||
; AVX512DQ-NEXT: vpor %ymm0, %ymm2, %ymm0
|
||||
; AVX512DQ-NEXT: vpmovsxwd %ymm0, %zmm0
|
||||
; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0
|
||||
; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0
|
||||
; AVX512DQ-NEXT: kmovw %k0, %ecx
|
||||
; AVX512DQ-NEXT: shll $16, %ecx
|
||||
; AVX512DQ-NEXT: orl %eax, %ecx
|
||||
; AVX512DQ-NEXT: kortestw %k0, %k0
|
||||
; AVX512DQ-NEXT: je LBB77_1
|
||||
; AVX512DQ-NEXT: ## %bb.2: ## %exit
|
||||
; AVX512DQ-NEXT: vzeroupper
|
||||
@ -5027,11 +5015,10 @@ define void @ktest_7(<64 x i8> %w, <64 x i8> %x, <64 x i8> %y, <64 x i8> %z) {
|
||||
; KNL-NEXT: vpcmpeqb %ymm5, %ymm3, %ymm3
|
||||
; KNL-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
|
||||
; KNL-NEXT: vpternlogq $200, %zmm1, %zmm0, %zmm2
|
||||
; KNL-NEXT: vpmovmskb %ymm2, %eax
|
||||
; KNL-NEXT: vextracti64x4 $1, %zmm2, %ymm0
|
||||
; KNL-NEXT: vpmovmskb %ymm0, %ecx
|
||||
; KNL-NEXT: shlq $32, %rcx
|
||||
; KNL-NEXT: orq %rax, %rcx
|
||||
; KNL-NEXT: vpor %ymm0, %ymm2, %ymm0
|
||||
; KNL-NEXT: vpmovmskb %ymm0, %eax
|
||||
; KNL-NEXT: testl %eax, %eax
|
||||
; KNL-NEXT: je LBB78_1
|
||||
; KNL-NEXT: ## %bb.2: ## %exit
|
||||
; KNL-NEXT: vzeroupper
|
||||
@ -5107,11 +5094,10 @@ define void @ktest_7(<64 x i8> %w, <64 x i8> %x, <64 x i8> %y, <64 x i8> %z) {
|
||||
; AVX512DQ-NEXT: vpcmpeqb %ymm5, %ymm3, %ymm3
|
||||
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
|
||||
; AVX512DQ-NEXT: vpternlogq $200, %zmm1, %zmm0, %zmm2
|
||||
; AVX512DQ-NEXT: vpmovmskb %ymm2, %eax
|
||||
; AVX512DQ-NEXT: vextracti64x4 $1, %zmm2, %ymm0
|
||||
; AVX512DQ-NEXT: vpmovmskb %ymm0, %ecx
|
||||
; AVX512DQ-NEXT: shlq $32, %rcx
|
||||
; AVX512DQ-NEXT: orq %rax, %rcx
|
||||
; AVX512DQ-NEXT: vpor %ymm0, %ymm2, %ymm0
|
||||
; AVX512DQ-NEXT: vpmovmskb %ymm0, %eax
|
||||
; AVX512DQ-NEXT: testl %eax, %eax
|
||||
; AVX512DQ-NEXT: je LBB78_1
|
||||
; AVX512DQ-NEXT: ## %bb.2: ## %exit
|
||||
; AVX512DQ-NEXT: vzeroupper
|
||||
|
@ -4,10 +4,8 @@
|
||||
define i1 @cmp_allbits_concat_i8(i8 %x, i8 %y) {
|
||||
; CHECK-LABEL: cmp_allbits_concat_i8:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movzbl %sil, %eax
|
||||
; CHECK-NEXT: shll $8, %edi
|
||||
; CHECK-NEXT: orl %eax, %edi
|
||||
; CHECK-NEXT: cmpw $-1, %di
|
||||
; CHECK-NEXT: andl %esi, %edi
|
||||
; CHECK-NEXT: cmpb $-1, %dil
|
||||
; CHECK-NEXT: sete %al
|
||||
; CHECK-NEXT: retq
|
||||
%zx = zext i8 %x to i16
|
||||
@ -21,10 +19,7 @@ define i1 @cmp_allbits_concat_i8(i8 %x, i8 %y) {
|
||||
define i1 @cmp_anybits_concat_i32(i32 %x, i32 %y) {
|
||||
; CHECK-LABEL: cmp_anybits_concat_i32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
|
||||
; CHECK-NEXT: movl %esi, %eax
|
||||
; CHECK-NEXT: shlq $32, %rdi
|
||||
; CHECK-NEXT: orq %rax, %rdi
|
||||
; CHECK-NEXT: orl %esi, %edi
|
||||
; CHECK-NEXT: setne %al
|
||||
; CHECK-NEXT: retq
|
||||
%zx = zext i32 %x to i64
|
||||
@ -39,9 +34,9 @@ define i1 @cmp_anybits_concat_shl_shl_i16(i16 %x, i16 %y) {
|
||||
; CHECK-LABEL: cmp_anybits_concat_shl_shl_i16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: # kill: def $esi killed $esi def $rsi
|
||||
; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
|
||||
; CHECK-NEXT: movzwl %di, %eax
|
||||
; CHECK-NEXT: movzwl %si, %ecx
|
||||
; CHECK-NEXT: shlq $32, %rax
|
||||
; CHECK-NEXT: shlq $8, %rcx
|
||||
; CHECK-NEXT: orq %rax, %rcx
|
||||
; CHECK-NEXT: sete %al
|
||||
@ -59,9 +54,9 @@ define i1 @cmp_anybits_concat_shl_shl_i16_commute(i16 %x, i16 %y) {
|
||||
; CHECK-LABEL: cmp_anybits_concat_shl_shl_i16_commute:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: # kill: def $esi killed $esi def $rsi
|
||||
; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
|
||||
; CHECK-NEXT: movzwl %di, %eax
|
||||
; CHECK-NEXT: movzwl %si, %ecx
|
||||
; CHECK-NEXT: shlq $32, %rax
|
||||
; CHECK-NEXT: shlq $8, %rcx
|
||||
; CHECK-NEXT: orq %rax, %rcx
|
||||
; CHECK-NEXT: sete %al
|
||||
@ -75,6 +70,9 @@ define i1 @cmp_anybits_concat_shl_shl_i16_commute(i16 %x, i16 %y) {
|
||||
ret i1 %r
|
||||
}
|
||||
|
||||
; FIXME: Add vector support, but its only worth it if we can freely truncate the
|
||||
; concat'd vectors.
|
||||
|
||||
define <16 x i8> @cmp_allbits_concat_v16i8(<16 x i8> %x, <16 x i8> %y) {
|
||||
; CHECK-LABEL: cmp_allbits_concat_v16i8:
|
||||
; CHECK: # %bb.0:
|
||||
|
File diff suppressed because it is too large
Load Diff
Loading…
x
Reference in New Issue
Block a user