1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 12:41:49 +01:00

[X86][XOP] Shuffle v16i8 using VPPERM(X,Y) instead of OR(PSHUFB(X),PSHUFB(Y))

This commit is contained in:
Simon Pilgrim 2020-07-28 13:08:22 +01:00
parent 71af86f9f7
commit 51f8ba9b43
5 changed files with 41 additions and 43 deletions

View File

@ -15057,6 +15057,12 @@ static SDValue lowerV16I8Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
if (Subtarget.hasVBMI() && Subtarget.hasVLX())
return lowerShuffleWithPERMV(DL, MVT::v16i8, Mask, V1, V2, DAG);
// If we have XOP we can use one VPPERM instead of multiple PSHUFBs.
if (Subtarget.hasXOP()) {
SDValue MaskNode = getConstVector(Mask, MVT::v16i8, DAG, DL, true);
return DAG.getNode(X86ISD::VPPERM, DL, MVT::v16i8, V1, V2, MaskNode);
}
// Use PALIGNR+Permute if possible - permute might become PSHUFB but the
// PALIGNR will be cheaper than the second PSHUFB+OR.
if (SDValue V = lowerShuffleAsByteRotateAndPermute(

View File

@ -775,17 +775,11 @@ define void @interleave_24i8_out(<24 x i8>* %p, <8 x i8>* %q1, <8 x i8>* %q2, <8
;
; XOP-LABEL: interleave_24i8_out:
; XOP: # %bb.0:
; XOP-NEXT: vmovdqu (%rdi), %xmm0
; XOP-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; XOP-NEXT: vpshufb {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,xmm1[2,5,u,u,u,u,u,u,u,u]
; XOP-NEXT: vpshufb {{.*#+}} xmm3 = xmm0[0,3,6,9,12,15],zero,zero,xmm0[u,u,u,u,u,u,u,u]
; XOP-NEXT: vpor %xmm2, %xmm3, %xmm2
; XOP-NEXT: vpshufb {{.*#+}} xmm3 = zero,zero,zero,zero,zero,xmm1[0,3,6,u,u,u,u,u,u,u,u]
; XOP-NEXT: vpshufb {{.*#+}} xmm4 = xmm0[1,4,7,10,13],zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
; XOP-NEXT: vpor %xmm3, %xmm4, %xmm3
; XOP-NEXT: vpshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,xmm1[1,4,7,u,u,u,u,u,u,u,u]
; XOP-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,5,8,11,14],zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
; XOP-NEXT: vpor %xmm1, %xmm0, %xmm0
; XOP-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
; XOP-NEXT: vmovdqu (%rdi), %xmm1
; XOP-NEXT: vpperm {{.*#+}} xmm2 = xmm1[0,3,6,9,12,15],xmm0[2,5],xmm1[u,u,u,u,u,u,u,u]
; XOP-NEXT: vpperm {{.*#+}} xmm3 = xmm1[1,4,7,10,13],xmm0[0,3,6],xmm1[u,u,u,u,u,u,u,u]
; XOP-NEXT: vpperm {{.*#+}} xmm0 = xmm1[2,5,8,11,14],xmm0[1,4,7],xmm1[u,u,u,u,u,u,u,u]
; XOP-NEXT: vmovq %xmm2, (%rsi)
; XOP-NEXT: vmovq %xmm3, (%rdx)
; XOP-NEXT: vmovq %xmm0, (%rcx)
@ -883,12 +877,8 @@ define void @interleave_24i8_in(<24 x i8>* %p, <8 x i8>* %q1, <8 x i8>* %q2, <8
; XOP-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; XOP-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; XOP-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; XOP-NEXT: vpshufb {{.*#+}} xmm2 = xmm0[0,8],zero,xmm0[1,9],zero,xmm0[2,10],zero,xmm0[3,11],zero,xmm0[4,12],zero,xmm0[5]
; XOP-NEXT: vpshufb {{.*#+}} xmm3 = zero,zero,xmm1[0],zero,zero,xmm1[1],zero,zero,xmm1[2],zero,zero,xmm1[3],zero,zero,xmm1[4],zero
; XOP-NEXT: vpor %xmm3, %xmm2, %xmm2
; XOP-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[13],zero,xmm0[6,14],zero,xmm0[7,15],zero,xmm0[u,u,u,u,u,u,u,u]
; XOP-NEXT: vpshufb {{.*#+}} xmm1 = zero,xmm1[5],zero,zero,xmm1[6],zero,zero,xmm1[7,u,u,u,u,u,u,u,u]
; XOP-NEXT: vpor %xmm1, %xmm0, %xmm0
; XOP-NEXT: vpperm {{.*#+}} xmm2 = xmm0[0,8],xmm1[0],xmm0[1,9],xmm1[1],xmm0[2,10],xmm1[2],xmm0[3,11],xmm1[3],xmm0[4,12],xmm1[4],xmm0[5]
; XOP-NEXT: vpperm {{.*#+}} xmm0 = xmm0[13],xmm1[5],xmm0[6,14],xmm1[6],xmm0[7,15],xmm1[7],xmm0[u,u,u,u,u,u,u,u]
; XOP-NEXT: vmovq %xmm0, 16(%rdi)
; XOP-NEXT: vmovdqu %xmm2, (%rdi)
; XOP-NEXT: retq

View File

@ -1577,12 +1577,19 @@ define <16 x i8> @shuffle_v16i8_uu_10_02_07_22_14_07_02_18_03_01_14_18_09_11_00(
; SSE41-NEXT: por %xmm1, %xmm0
; SSE41-NEXT: retq
;
; AVX1OR2-LABEL: shuffle_v16i8_uu_10_02_07_22_14_07_02_18_03_01_14_18_09_11_00:
; AVX1OR2: # %bb.0: # %entry
; AVX1OR2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[u],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[2],zero,zero,zero
; AVX1OR2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[u,10,2,7],zero,xmm0[14,7,2],zero,xmm0[3,1,14],zero,xmm0[9,11,0]
; AVX1OR2-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX1OR2-NEXT: retq
; AVX1-LABEL: shuffle_v16i8_uu_10_02_07_22_14_07_02_18_03_01_14_18_09_11_00:
; AVX1: # %bb.0: # %entry
; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[u],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[2],zero,zero,zero
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[u,10,2,7],zero,xmm0[14,7,2],zero,xmm0[3,1,14],zero,xmm0[9,11,0]
; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: shuffle_v16i8_uu_10_02_07_22_14_07_02_18_03_01_14_18_09_11_00:
; AVX2: # %bb.0: # %entry
; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[u],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[2],zero,zero,zero
; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[u,10,2,7],zero,xmm0[14,7,2],zero,xmm0[3,1,14],zero,xmm0[9,11,0]
; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: retq
;
; AVX512VLBW-LABEL: shuffle_v16i8_uu_10_02_07_22_14_07_02_18_03_01_14_18_09_11_00:
; AVX512VLBW: # %bb.0: # %entry
@ -1596,6 +1603,11 @@ define <16 x i8> @shuffle_v16i8_uu_10_02_07_22_14_07_02_18_03_01_14_18_09_11_00(
; AVX512VLVBMI-NEXT: vmovdqa {{.*#+}} xmm2 = <u,10,2,7,22,14,7,2,18,3,1,14,18,9,11,0>
; AVX512VLVBMI-NEXT: vpermt2b %xmm1, %xmm2, %xmm0
; AVX512VLVBMI-NEXT: retq
;
; XOP-LABEL: shuffle_v16i8_uu_10_02_07_22_14_07_02_18_03_01_14_18_09_11_00:
; XOP: # %bb.0: # %entry
; XOP-NEXT: vpperm {{.*#+}} xmm0 = xmm0[u,10,2,7],xmm1[6],xmm0[14,7,2],xmm1[2],xmm0[3,1,14],xmm1[2],xmm0[9,11,0]
; XOP-NEXT: retq
entry:
%shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 undef, i32 10, i32 2, i32 7, i32 22, i32 14, i32 7, i32 2, i32 18, i32 3, i32 1, i32 14, i32 18, i32 9, i32 11, i32 0>

View File

@ -3407,20 +3407,14 @@ define <32 x i8> @shuffle_v32i8_42_45_12_13_35_35_60_40_17_22_29_44_33_12_48_51_
;
; XOPAVX1-LABEL: shuffle_v32i8_42_45_12_13_35_35_60_40_17_22_29_44_33_12_48_51_20_19_52_19_49_54_37_32_48_42_59_07_36_34_36_39:
; XOPAVX1: # %bb.0:
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; XOPAVX1-NEXT: vpperm {{.*#+}} xmm3 = xmm2[4,3,u,3,u,u,u,u,u,u,u],xmm0[7],xmm2[u,u,u,u]
; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm5 = xmm4[u,u,4,u,1,6],zero,zero,xmm4[0],zero,xmm4[11,u],zero,zero,zero,zero
; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm6 = xmm1[u,u],zero,xmm1[u],zero,zero,xmm1[5,0],zero,xmm1[10],zero,xmm1[u,4,2,4,7]
; XOPAVX1-NEXT: vpor %xmm5, %xmm6, %xmm5
; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
; XOPAVX1-NEXT: vpperm {{.*#+}} xmm3 = xmm1[u,u],xmm2[4],xmm1[u],xmm2[1,6],xmm1[5,0],xmm2[0],xmm1[10],xmm2[11],xmm1[u,4,2,4,7]
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
; XOPAVX1-NEXT: vpperm {{.*#+}} xmm5 = xmm4[4,3,u,3,u,u,u,u,u,u,u],xmm0[7],xmm4[u,u,u,u]
; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm6 = [0,0,255,0,255,255,255,255,255,255,255,0,255,255,255,255]
; XOPAVX1-NEXT: vpblendvb %xmm6, %xmm5, %xmm3, %xmm3
; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm4 = zero,zero,xmm4[u,u],zero,zero,xmm4[12],zero,xmm4[u,u,u],zero,zero,xmm4[u,0,3]
; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[10,13,u,u,3,3],zero,xmm1[8,u,u,u,12,1,u],zero,zero
; XOPAVX1-NEXT: vpor %xmm4, %xmm1, %xmm1
; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[u,u],zero,zero,xmm2[u,u,u,u,1,6,13,u,u],zero,xmm2[u,u]
; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[u,u,12,13,u,u,u,u],zero,zero,zero,xmm0[u,u,12,u,u]
; XOPAVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
; XOPAVX1-NEXT: vpblendvb %xmm6, %xmm3, %xmm5, %xmm3
; XOPAVX1-NEXT: vpperm {{.*#+}} xmm1 = xmm1[10,13,u,u,3,3],xmm2[12],xmm1[8,u,u,u,12,1,u],xmm2[0,3]
; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[u,u,12,13,u,u,u,u],xmm4[1,6,13],xmm0[u,u,12,u,u]
; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [255,255,0,0,255,255,255,255,0,0,0,255,255,0,255,255]
; XOPAVX1-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
; XOPAVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0

View File

@ -26,13 +26,9 @@ define <32 x i8> @foo(<48 x i8>* %x0) {
; XOP-NEXT: vmovdqu (%rdi), %xmm0
; XOP-NEXT: vmovdqu 16(%rdi), %xmm1
; XOP-NEXT: vmovdqu 32(%rdi), %xmm2
; XOP-NEXT: vpshufb {{.*#+}} xmm3 = xmm1[8,9,11,12,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; XOP-NEXT: vpshufb {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,xmm2[1,2,4,5,7,8,10,11,13,14]
; XOP-NEXT: vpor %xmm3, %xmm2, %xmm2
; XOP-NEXT: vpshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,2,3,5,6]
; XOP-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,3,4,6,7,9,10,12,13,15],zero,zero,zero,zero,zero
; XOP-NEXT: vpor %xmm1, %xmm0, %xmm0
; XOP-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; XOP-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0,1,3,4,6,7,9,10,12,13,15],xmm1[0,2,3,5,6]
; XOP-NEXT: vpperm {{.*#+}} xmm1 = xmm1[8,9,11,12,14,15],xmm2[1,2,4,5,7,8,10,11,13,14]
; XOP-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; XOP-NEXT: retq
;
; AVX2-LABEL: foo: