mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-10-19 02:52:53 +02:00
[X86][AVX] combineBitcastvxi1 - peek through bitops to determine size of original vector
We were only testing for direct SETCC results - this allows us to peek through AND/OR/XOR combinations of the comparison results as well. There's a missing SEXT(PACKSS) fold that I need to investigate for v8i1 cases before I can enable it there as well. llvm-svn: 361716
This commit is contained in:
parent
799c41b22f
commit
854db1c5c3
@ -34126,6 +34126,21 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG,
|
|||||||
EltNo);
|
EltNo);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Helper to peek through bitops/setcc to determine size of source vector.
|
||||||
|
// Allows combineBitcastvxi1 to determine what size vector generated a <X x i1>.
|
||||||
|
static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size) {
|
||||||
|
switch (Src.getOpcode()) {
|
||||||
|
case ISD::SETCC:
|
||||||
|
return Src.getOperand(0).getValueSizeInBits() == Size;
|
||||||
|
case ISD::AND:
|
||||||
|
case ISD::XOR:
|
||||||
|
case ISD::OR:
|
||||||
|
return checkBitcastSrcVectorSize(Src.getOperand(0), Size) &&
|
||||||
|
checkBitcastSrcVectorSize(Src.getOperand(1), Size);
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
// Try to match patterns such as
|
// Try to match patterns such as
|
||||||
// (i16 bitcast (v16i1 x))
|
// (i16 bitcast (v16i1 x))
|
||||||
// ->
|
// ->
|
||||||
@ -34174,10 +34189,8 @@ static SDValue combineBitcastvxi1(SelectionDAG &DAG, EVT VT, SDValue Src,
|
|||||||
SExtVT = MVT::v4i32;
|
SExtVT = MVT::v4i32;
|
||||||
// For cases such as (i4 bitcast (v4i1 setcc v4i64 v1, v2))
|
// For cases such as (i4 bitcast (v4i1 setcc v4i64 v1, v2))
|
||||||
// sign-extend to a 256-bit operation to avoid truncation.
|
// sign-extend to a 256-bit operation to avoid truncation.
|
||||||
if (Src.getOpcode() == ISD::SETCC && Subtarget.hasAVX() &&
|
if (Subtarget.hasAVX() && checkBitcastSrcVectorSize(Src, 256))
|
||||||
Src.getOperand(0).getValueType().is256BitVector()) {
|
|
||||||
SExtVT = MVT::v4i64;
|
SExtVT = MVT::v4i64;
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
case MVT::v8i1:
|
case MVT::v8i1:
|
||||||
SExtVT = MVT::v8i16;
|
SExtVT = MVT::v8i16;
|
||||||
@ -34186,6 +34199,7 @@ static SDValue combineBitcastvxi1(SelectionDAG &DAG, EVT VT, SDValue Src,
|
|||||||
// If the setcc operand is 128-bit, prefer sign-extending to 128-bit over
|
// If the setcc operand is 128-bit, prefer sign-extending to 128-bit over
|
||||||
// 256-bit because the shuffle is cheaper than sign extending the result of
|
// 256-bit because the shuffle is cheaper than sign extending the result of
|
||||||
// the compare.
|
// the compare.
|
||||||
|
// TODO : use checkBitcastSrcVectorSize
|
||||||
if (Src.getOpcode() == ISD::SETCC && Subtarget.hasAVX() &&
|
if (Src.getOpcode() == ISD::SETCC && Subtarget.hasAVX() &&
|
||||||
(Src.getOperand(0).getValueType().is256BitVector() ||
|
(Src.getOperand(0).getValueType().is256BitVector() ||
|
||||||
Src.getOperand(0).getValueType().is512BitVector())) {
|
Src.getOperand(0).getValueType().is512BitVector())) {
|
||||||
|
@ -55,18 +55,18 @@ define i4 @v4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64> %c, <4 x i64> %d) {
|
|||||||
;
|
;
|
||||||
; AVX1-LABEL: v4i64:
|
; AVX1-LABEL: v4i64:
|
||||||
; AVX1: # %bb.0:
|
; AVX1: # %bb.0:
|
||||||
; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm4
|
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
|
||||||
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
|
||||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
; AVX1-NEXT: vpcmpgtq %xmm4, %xmm5, %xmm4
|
||||||
; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
|
; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
|
||||||
; AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm1
|
; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0
|
||||||
; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm1
|
; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm1
|
||||||
; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm3
|
; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
|
||||||
; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2
|
; AVX1-NEXT: vpcmpgtq %xmm1, %xmm4, %xmm1
|
||||||
; AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
|
; AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
|
||||||
; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
|
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
|
||||||
; AVX1-NEXT: vpackssdw %xmm0, %xmm1, %xmm0
|
; AVX1-NEXT: vandpd %ymm1, %ymm0, %ymm0
|
||||||
; AVX1-NEXT: vmovmskps %xmm0, %eax
|
; AVX1-NEXT: vmovmskpd %ymm0, %eax
|
||||||
; AVX1-NEXT: # kill: def $al killed $al killed $eax
|
; AVX1-NEXT: # kill: def $al killed $al killed $eax
|
||||||
; AVX1-NEXT: vzeroupper
|
; AVX1-NEXT: vzeroupper
|
||||||
; AVX1-NEXT: retq
|
; AVX1-NEXT: retq
|
||||||
@ -76,9 +76,7 @@ define i4 @v4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64> %c, <4 x i64> %d) {
|
|||||||
; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
|
; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
|
||||||
; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm1
|
; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm1
|
||||||
; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
|
; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
|
||||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
; AVX2-NEXT: vmovmskpd %ymm0, %eax
|
||||||
; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
|
|
||||||
; AVX2-NEXT: vmovmskps %xmm0, %eax
|
|
||||||
; AVX2-NEXT: # kill: def $al killed $al killed $eax
|
; AVX2-NEXT: # kill: def $al killed $al killed $eax
|
||||||
; AVX2-NEXT: vzeroupper
|
; AVX2-NEXT: vzeroupper
|
||||||
; AVX2-NEXT: retq
|
; AVX2-NEXT: retq
|
||||||
@ -126,9 +124,7 @@ define i4 @v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c, <4 x double>
|
|||||||
; AVX12-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
|
; AVX12-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
|
||||||
; AVX12-NEXT: vcmpltpd %ymm2, %ymm3, %ymm1
|
; AVX12-NEXT: vcmpltpd %ymm2, %ymm3, %ymm1
|
||||||
; AVX12-NEXT: vandpd %ymm1, %ymm0, %ymm0
|
; AVX12-NEXT: vandpd %ymm1, %ymm0, %ymm0
|
||||||
; AVX12-NEXT: vextractf128 $1, %ymm0, %xmm1
|
; AVX12-NEXT: vmovmskpd %ymm0, %eax
|
||||||
; AVX12-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
|
|
||||||
; AVX12-NEXT: vmovmskps %xmm0, %eax
|
|
||||||
; AVX12-NEXT: # kill: def $al killed $al killed $eax
|
; AVX12-NEXT: # kill: def $al killed $al killed $eax
|
||||||
; AVX12-NEXT: vzeroupper
|
; AVX12-NEXT: vzeroupper
|
||||||
; AVX12-NEXT: retq
|
; AVX12-NEXT: retq
|
||||||
|
Loading…
Reference in New Issue
Block a user