1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 02:52:53 +02:00

[X86][AVX] combineBitcastvxi1 - peek through bitops to determine size of original vector

We were only testing for direct SETCC results - this allows us to peek through AND/OR/XOR combinations of the comparison results as well.

There's a missing SEXT(PACKSS) fold that I need to investigate for v8i1 cases before I can enable it there as well.

llvm-svn: 361716
This commit is contained in:
Simon Pilgrim 2019-05-26 10:54:23 +00:00
parent 799c41b22f
commit 854db1c5c3
2 changed files with 29 additions and 19 deletions

View File

@ -34126,6 +34126,21 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG,
EltNo); EltNo);
} }
// Helper to peek through bitops/setcc to determine size of source vector.
// Allows combineBitcastvxi1 to determine what size vector generated a <X x i1>.
static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size) {
switch (Src.getOpcode()) {
case ISD::SETCC:
return Src.getOperand(0).getValueSizeInBits() == Size;
case ISD::AND:
case ISD::XOR:
case ISD::OR:
return checkBitcastSrcVectorSize(Src.getOperand(0), Size) &&
checkBitcastSrcVectorSize(Src.getOperand(1), Size);
}
return false;
}
// Try to match patterns such as // Try to match patterns such as
// (i16 bitcast (v16i1 x)) // (i16 bitcast (v16i1 x))
// -> // ->
@ -34174,10 +34189,8 @@ static SDValue combineBitcastvxi1(SelectionDAG &DAG, EVT VT, SDValue Src,
SExtVT = MVT::v4i32; SExtVT = MVT::v4i32;
// For cases such as (i4 bitcast (v4i1 setcc v4i64 v1, v2)) // For cases such as (i4 bitcast (v4i1 setcc v4i64 v1, v2))
// sign-extend to a 256-bit operation to avoid truncation. // sign-extend to a 256-bit operation to avoid truncation.
if (Src.getOpcode() == ISD::SETCC && Subtarget.hasAVX() && if (Subtarget.hasAVX() && checkBitcastSrcVectorSize(Src, 256))
Src.getOperand(0).getValueType().is256BitVector()) {
SExtVT = MVT::v4i64; SExtVT = MVT::v4i64;
}
break; break;
case MVT::v8i1: case MVT::v8i1:
SExtVT = MVT::v8i16; SExtVT = MVT::v8i16;
@ -34186,6 +34199,7 @@ static SDValue combineBitcastvxi1(SelectionDAG &DAG, EVT VT, SDValue Src,
// If the setcc operand is 128-bit, prefer sign-extending to 128-bit over // If the setcc operand is 128-bit, prefer sign-extending to 128-bit over
// 256-bit because the shuffle is cheaper than sign extending the result of // 256-bit because the shuffle is cheaper than sign extending the result of
// the compare. // the compare.
// TODO : use checkBitcastSrcVectorSize
if (Src.getOpcode() == ISD::SETCC && Subtarget.hasAVX() && if (Src.getOpcode() == ISD::SETCC && Subtarget.hasAVX() &&
(Src.getOperand(0).getValueType().is256BitVector() || (Src.getOperand(0).getValueType().is256BitVector() ||
Src.getOperand(0).getValueType().is512BitVector())) { Src.getOperand(0).getValueType().is512BitVector())) {

View File

@ -55,18 +55,18 @@ define i4 @v4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64> %c, <4 x i64> %d) {
; ;
; AVX1-LABEL: v4i64: ; AVX1-LABEL: v4i64:
; AVX1: # %bb.0: ; AVX1: # %bb.0:
; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm4 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm4, %xmm5, %xmm4
; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0
; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm1 ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm1
; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm3 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm4, %xmm1
; AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2 ; AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
; AVX1-NEXT: vpackssdw %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vandpd %ymm1, %ymm0, %ymm0
; AVX1-NEXT: vmovmskps %xmm0, %eax ; AVX1-NEXT: vmovmskpd %ymm0, %eax
; AVX1-NEXT: # kill: def $al killed $al killed $eax ; AVX1-NEXT: # kill: def $al killed $al killed $eax
; AVX1-NEXT: vzeroupper ; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq ; AVX1-NEXT: retq
@ -76,9 +76,7 @@ define i4 @v4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64> %c, <4 x i64> %d) {
; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm1 ; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm1
; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX2-NEXT: vmovmskpd %ymm0, %eax
; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovmskps %xmm0, %eax
; AVX2-NEXT: # kill: def $al killed $al killed $eax ; AVX2-NEXT: # kill: def $al killed $al killed $eax
; AVX2-NEXT: vzeroupper ; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq ; AVX2-NEXT: retq
@ -126,9 +124,7 @@ define i4 @v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c, <4 x double>
; AVX12-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0 ; AVX12-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
; AVX12-NEXT: vcmpltpd %ymm2, %ymm3, %ymm1 ; AVX12-NEXT: vcmpltpd %ymm2, %ymm3, %ymm1
; AVX12-NEXT: vandpd %ymm1, %ymm0, %ymm0 ; AVX12-NEXT: vandpd %ymm1, %ymm0, %ymm0
; AVX12-NEXT: vextractf128 $1, %ymm0, %xmm1 ; AVX12-NEXT: vmovmskpd %ymm0, %eax
; AVX12-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
; AVX12-NEXT: vmovmskps %xmm0, %eax
; AVX12-NEXT: # kill: def $al killed $al killed $eax ; AVX12-NEXT: # kill: def $al killed $al killed $eax
; AVX12-NEXT: vzeroupper ; AVX12-NEXT: vzeroupper
; AVX12-NEXT: retq ; AVX12-NEXT: retq