mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
[x86] be more selective about converting 'and' to shuffle (PR37749)
isVectorClearMaskLegal() is the TLI hook used by the generic DAGCombiner::XformToShuffleWithZero(). We've grown to accomodate/expect this transform to shuffle (disabling it more generally results in many regressions). So I'm narrowly excluding the 256-bit types that clearly are not worthwhile for AVX1. I think in most cases we are able to recover by converting the shuffle back into 'and' ops, but the cases in: https://bugs.llvm.org/show_bug.cgi?id=37749 ...show that there are cracks. llvm-svn: 334759
This commit is contained in:
parent
bb91ae466c
commit
41b45b2b8c
@ -26245,6 +26245,12 @@ bool X86TargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const {
|
||||
bool
|
||||
X86TargetLowering::isVectorClearMaskLegal(const SmallVectorImpl<int> &Mask,
|
||||
EVT VT) const {
|
||||
// Don't convert an 'and' into a shuffle that we don't directly support.
|
||||
// vpblendw and vpshufb for 256-bit vectors are not available on AVX1.
|
||||
if (!Subtarget.hasAVX2())
|
||||
if (VT == MVT::v32i8 || VT == MVT::v16i16)
|
||||
return false;
|
||||
|
||||
// Just delegate to the generic legality, clear masks aren't special.
|
||||
return isShuffleMaskLegal(Mask, VT);
|
||||
}
|
||||
|
@ -310,17 +310,17 @@ define <4 x i64> @and_xor_splat1_v4i64(<4 x i64> %x) nounwind {
|
||||
define <8 x i32> @and_disguised_i8_elts(<8 x i32> %x, <8 x i32> %y, <8 x i32> %z) {
|
||||
; AVX1-LABEL: and_disguised_i8_elts:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
|
||||
; AVX1-NEXT: vpaddd %xmm3, %xmm4, %xmm3
|
||||
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm3
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3
|
||||
; AVX1-NEXT: vpaddd %xmm3, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpaddd %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1095216660735,1095216660735]
|
||||
; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
|
||||
; AVX1-NEXT: vpaddd %xmm4, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpand %xmm1, %xmm3, %xmm1
|
||||
; AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; INT256-LABEL: and_disguised_i8_elts:
|
||||
@ -396,17 +396,17 @@ define <8 x i32> @xor_disguised_i8_elts(<8 x i32> %x, <8 x i32> %y, <8 x i32> %z
|
||||
define <8 x i32> @and_disguised_i16_elts(<8 x i32> %x, <8 x i32> %y, <8 x i32> %z) {
|
||||
; AVX1-LABEL: and_disguised_i16_elts:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
|
||||
; AVX1-NEXT: vpaddd %xmm3, %xmm4, %xmm3
|
||||
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm3
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3
|
||||
; AVX1-NEXT: vpaddd %xmm3, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpaddd %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
|
||||
; AVX1-NEXT: vpaddd %xmm4, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm3[0],xmm1[1],xmm3[2],xmm1[3],xmm3[4],xmm1[5],xmm3[6],xmm1[7]
|
||||
; AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; INT256-LABEL: and_disguised_i16_elts:
|
||||
|
Loading…
x
Reference in New Issue
Block a user