mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-10-19 11:02:59 +02:00
[X86][AVX] IsElementEquivalent - add matchShuffleWithUNPCK + VBROADCAST/VBROADCAST_LOAD handling
Specify LHS/RHS operands in matchShuffleWithUNPCK's calls to isTargetShuffleEquivalent, and handle VBROADCAST/VBROADCAST_LOAD matching in IsElementEquivalent
This commit is contained in:
parent
5da799cc4f
commit
2df4fafdf1
@ -10960,6 +10960,11 @@ static bool IsElementEquivalent(int MaskSize, SDValue Op, SDValue ExpectedOp,
|
||||
MaskSize == (int)ExpectedOp.getNumOperands())
|
||||
return Op.getOperand(Idx) == ExpectedOp.getOperand(ExpectedIdx);
|
||||
break;
|
||||
case X86ISD::VBROADCAST:
|
||||
case X86ISD::VBROADCAST_LOAD:
|
||||
// TODO: Handle MaskSize != Op.getValueType().getVectorNumElements()?
|
||||
return (Op == ExpectedOp &&
|
||||
Op.getValueType().getVectorNumElements() == MaskSize);
|
||||
case X86ISD::HADD:
|
||||
case X86ISD::HSUB:
|
||||
case X86ISD::FHADD:
|
||||
@ -11321,7 +11326,8 @@ static bool matchShuffleWithUNPCK(MVT VT, SDValue &V1, SDValue &V2,
|
||||
// Attempt to match the target mask against the unpack lo/hi mask patterns.
|
||||
SmallVector<int, 64> Unpckl, Unpckh;
|
||||
createUnpackShuffleMask(VT, Unpckl, /* Lo = */ true, IsUnary);
|
||||
if (isTargetShuffleEquivalent(VT, TargetMask, Unpckl)) {
|
||||
if (isTargetShuffleEquivalent(VT, TargetMask, Unpckl, V1,
|
||||
(IsUnary ? V1 : V2))) {
|
||||
UnpackOpcode = X86ISD::UNPCKL;
|
||||
V2 = (Undef2 ? DAG.getUNDEF(VT) : (IsUnary ? V1 : V2));
|
||||
V1 = (Undef1 ? DAG.getUNDEF(VT) : V1);
|
||||
@ -11329,7 +11335,8 @@ static bool matchShuffleWithUNPCK(MVT VT, SDValue &V1, SDValue &V2,
|
||||
}
|
||||
|
||||
createUnpackShuffleMask(VT, Unpckh, /* Lo = */ false, IsUnary);
|
||||
if (isTargetShuffleEquivalent(VT, TargetMask, Unpckh)) {
|
||||
if (isTargetShuffleEquivalent(VT, TargetMask, Unpckh, V1,
|
||||
(IsUnary ? V1 : V2))) {
|
||||
UnpackOpcode = X86ISD::UNPCKH;
|
||||
V2 = (Undef2 ? DAG.getUNDEF(VT) : (IsUnary ? V1 : V2));
|
||||
V1 = (Undef1 ? DAG.getUNDEF(VT) : V1);
|
||||
|
@ -2245,7 +2245,7 @@ define void @not_avg_v16i8_wide_constants(<16 x i8>* %a, <16 x i8>* %b) nounwind
|
||||
; AVX2-NEXT: vpunpcklbw {{.*#+}} xmm9 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
|
||||
; AVX2-NEXT: vpbroadcastw %xmm8, %xmm8
|
||||
; AVX2-NEXT: vpbroadcastw %xmm9, %xmm0
|
||||
; AVX2-NEXT: vpblendw {{.*#+}} xmm8 = xmm0[0,1,2,3,4,5,6],xmm8[7]
|
||||
; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm8 = xmm0[0],xmm8[0],xmm0[1],xmm8[1],xmm0[2],xmm8[2],xmm0[3],xmm8[3]
|
||||
; AVX2-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm13[0],xmm12[0],xmm13[1],xmm12[1],xmm13[2],xmm12[2],xmm13[3],xmm12[3],xmm13[4],xmm12[4],xmm13[5],xmm12[5],xmm13[6],xmm12[6],xmm13[7],xmm12[7]
|
||||
; AVX2-NEXT: vpunpcklbw {{.*#+}} xmm9 = xmm15[0],xmm14[0],xmm15[1],xmm14[1],xmm15[2],xmm14[2],xmm15[3],xmm14[3],xmm15[4],xmm14[4],xmm15[5],xmm14[5],xmm15[6],xmm14[6],xmm15[7],xmm14[7]
|
||||
; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0
|
||||
|
@ -4230,11 +4230,10 @@ define <4 x double> @test_masked_z_8xdouble_to_4xdouble_perm_mem_mask6(<8 x doub
|
||||
define <4 x double> @test_masked_8xdouble_to_4xdouble_perm_mem_mask7(<8 x double>* %vp, <4 x double> %vec2, <4 x double> %mask) {
|
||||
; CHECK-LABEL: test_masked_8xdouble_to_4xdouble_perm_mem_mask7:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vbroadcastsd 40(%rdi), %ymm2
|
||||
; CHECK-NEXT: vblendpd $5, (%rdi), %ymm2, %ymm2 # ymm2 = mem[0],ymm2[1],mem[2],ymm2[3]
|
||||
; CHECK-NEXT: vmovapd (%rdi), %ymm2
|
||||
; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
|
||||
; CHECK-NEXT: vcmpeqpd %ymm3, %ymm1, %k1
|
||||
; CHECK-NEXT: vmovapd %ymm2, %ymm0 {%k1}
|
||||
; CHECK-NEXT: vunpcklpd 40(%rdi){1to4}, %ymm2, %ymm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%vec = load <8 x double>, <8 x double>* %vp
|
||||
%shuf = shufflevector <8 x double> %vec, <8 x double> undef, <4 x i32> <i32 0, i32 5, i32 2, i32 5>
|
||||
@ -4246,11 +4245,10 @@ define <4 x double> @test_masked_8xdouble_to_4xdouble_perm_mem_mask7(<8 x double
|
||||
define <4 x double> @test_masked_z_8xdouble_to_4xdouble_perm_mem_mask7(<8 x double>* %vp, <4 x double> %mask) {
|
||||
; CHECK-LABEL: test_masked_z_8xdouble_to_4xdouble_perm_mem_mask7:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vbroadcastsd 40(%rdi), %ymm1
|
||||
; CHECK-NEXT: vblendpd $5, (%rdi), %ymm1, %ymm1 # ymm1 = mem[0],ymm1[1],mem[2],ymm1[3]
|
||||
; CHECK-NEXT: vmovapd (%rdi), %ymm1
|
||||
; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
|
||||
; CHECK-NEXT: vcmpeqpd %ymm2, %ymm0, %k1
|
||||
; CHECK-NEXT: vmovapd %ymm1, %ymm0 {%k1} {z}
|
||||
; CHECK-NEXT: vunpcklpd 40(%rdi){1to4}, %ymm1, %ymm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%vec = load <8 x double>, <8 x double>* %vp
|
||||
%shuf = shufflevector <8 x double> %vec, <8 x double> undef, <4 x i32> <i32 0, i32 5, i32 2, i32 5>
|
||||
|
Loading…
Reference in New Issue
Block a user