mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 02:33:06 +01:00
[X86][AVX] Adjust AllowBWIVPERMV3 tolerance to account for VariableCrossLaneShuffleDepth
As noticed on D105390 - we were hardwiring the depth limit for combining to VPERMI2W/VPERMI2B instructions. Not only had we made the limit too low, we hadn't accounted for slow/fast shuffles via the VariableCrossLaneShuffleDepth control
This commit is contained in:
parent
fcfcb73f87
commit
2aaf1231f6
@ -36173,7 +36173,8 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
|
||||
(Depth >= VariablePerLaneShuffleDepth) || HasVariableMask;
|
||||
// VPERMI2W/VPERMI2B are 3 uops on Skylake and Icelake so we require a
|
||||
// higher depth before combining them.
|
||||
bool AllowBWIVPERMV3 = (Depth >= 2 || HasVariableMask);
|
||||
bool AllowBWIVPERMV3 =
|
||||
(Depth >= (VariableCrossLaneShuffleDepth + 2) || HasVariableMask);
|
||||
|
||||
bool MaskContainsZeros = isAnyZero(Mask);
|
||||
|
||||
|
@ -5,8 +5,8 @@
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE41
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,avx512bw | FileCheck %s --check-prefixes=AVX,AVX512,AVX512F
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=AVX,AVX512,AVX512VL
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,avx512bw | FileCheck %s --check-prefixes=AVX,AVX512
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=AVX,AVX512
|
||||
|
||||
define <2 x i64> @insert_v2i64_x1(<2 x i64> %a) {
|
||||
; SSE2-LABEL: insert_v2i64_x1:
|
||||
@ -297,21 +297,14 @@ define <16 x i16> @insert_v16i16_x12345x789ABCDEx(<16 x i16> %a) {
|
||||
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: insert_v16i16_x12345x789ABCDEx:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
|
||||
; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [32,1,2,3,4,5,38,7,8,9,10,11,12,13,14,47]
|
||||
; AVX512F-NEXT: vpermt2w %zmm1, %zmm2, %zmm0
|
||||
; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: insert_v16i16_x12345x789ABCDEx:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
|
||||
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [16,1,2,3,4,5,22,7,8,9,10,11,12,13,14,31]
|
||||
; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
|
||||
; AVX512VL-NEXT: retq
|
||||
; AVX512-LABEL: insert_v16i16_x12345x789ABCDEx:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
||||
; AVX512-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm0[1,2,3,4,5],xmm1[6],xmm0[7]
|
||||
; AVX512-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
|
||||
; AVX512-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm2[7],ymm0[8,9,10,11,12,13,14],ymm2[15]
|
||||
; AVX512-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
|
||||
; AVX512-NEXT: retq
|
||||
%1 = insertelement <16 x i16> %a, i16 -1, i32 0
|
||||
%2 = insertelement <16 x i16> %1, i16 -1, i32 6
|
||||
%3 = insertelement <16 x i16> %2, i16 -1, i32 15
|
||||
|
@ -1736,54 +1736,30 @@ define <32 x i8> @splatvar_rotate_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind "mi
|
||||
}
|
||||
|
||||
define <32 x i8> @constant_rotate_v32i8(<32 x i8> %a) nounwind "min-legal-vector-width"="256" {
|
||||
; CHECK-AVX512-LABEL: constant_rotate_v32i8:
|
||||
; CHECK-AVX512: # %bb.0:
|
||||
; CHECK-AVX512-NEXT: vpsllw $4, %ymm0, %ymm1
|
||||
; CHECK-AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
|
||||
; CHECK-AVX512-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256]
|
||||
; CHECK-AVX512-NEXT: # ymm2 = mem[0,1,0,1]
|
||||
; CHECK-AVX512-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm1
|
||||
; CHECK-AVX512-NEXT: vpsllw $2, %ymm1, %ymm3
|
||||
; CHECK-AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3
|
||||
; CHECK-AVX512-NEXT: vpaddb %ymm2, %ymm2, %ymm2
|
||||
; CHECK-AVX512-NEXT: vpblendvb %ymm2, %ymm3, %ymm1, %ymm1
|
||||
; CHECK-AVX512-NEXT: vpaddb %ymm1, %ymm1, %ymm3
|
||||
; CHECK-AVX512-NEXT: vpaddb %ymm2, %ymm2, %ymm2
|
||||
; CHECK-AVX512-NEXT: vpblendvb %ymm2, %ymm3, %ymm1, %ymm1
|
||||
; CHECK-AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; CHECK-AVX512-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm0[8],ymm2[8],ymm0[9],ymm2[9],ymm0[10],ymm2[10],ymm0[11],ymm2[11],ymm0[12],ymm2[12],ymm0[13],ymm2[13],ymm0[14],ymm2[14],ymm0[15],ymm2[15],ymm0[24],ymm2[24],ymm0[25],ymm2[25],ymm0[26],ymm2[26],ymm0[27],ymm2[27],ymm0[28],ymm2[28],ymm0[29],ymm2[29],ymm0[30],ymm2[30],ymm0[31],ymm2[31]
|
||||
; CHECK-AVX512-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3
|
||||
; CHECK-AVX512-NEXT: vpsrlw $8, %ymm3, %ymm3
|
||||
; CHECK-AVX512-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm2[0],ymm0[1],ymm2[1],ymm0[2],ymm2[2],ymm0[3],ymm2[3],ymm0[4],ymm2[4],ymm0[5],ymm2[5],ymm0[6],ymm2[6],ymm0[7],ymm2[7],ymm0[16],ymm2[16],ymm0[17],ymm2[17],ymm0[18],ymm2[18],ymm0[19],ymm2[19],ymm0[20],ymm2[20],ymm0[21],ymm2[21],ymm0[22],ymm2[22],ymm0[23],ymm2[23]
|
||||
; CHECK-AVX512-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
||||
; CHECK-AVX512-NEXT: vpsrlw $8, %ymm0, %ymm0
|
||||
; CHECK-AVX512-NEXT: vpackuswb %ymm3, %ymm0, %ymm0
|
||||
; CHECK-AVX512-NEXT: vpor %ymm0, %ymm1, %ymm0
|
||||
; CHECK-AVX512-NEXT: retq
|
||||
;
|
||||
; CHECK-VBMI-LABEL: constant_rotate_v32i8:
|
||||
; CHECK-VBMI: # %bb.0:
|
||||
; CHECK-VBMI-NEXT: vpsllw $4, %ymm0, %ymm1
|
||||
; CHECK-VBMI-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
|
||||
; CHECK-VBMI-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256]
|
||||
; CHECK-VBMI-NEXT: # ymm2 = mem[0,1,0,1]
|
||||
; CHECK-VBMI-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm1
|
||||
; CHECK-VBMI-NEXT: vpsllw $2, %ymm1, %ymm3
|
||||
; CHECK-VBMI-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3
|
||||
; CHECK-VBMI-NEXT: vpaddb %ymm2, %ymm2, %ymm2
|
||||
; CHECK-VBMI-NEXT: vpblendvb %ymm2, %ymm3, %ymm1, %ymm1
|
||||
; CHECK-VBMI-NEXT: vpaddb %ymm1, %ymm1, %ymm3
|
||||
; CHECK-VBMI-NEXT: vpaddb %ymm2, %ymm2, %ymm2
|
||||
; CHECK-VBMI-NEXT: vpblendvb %ymm2, %ymm3, %ymm1, %ymm1
|
||||
; CHECK-VBMI-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; CHECK-VBMI-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm0[8],ymm2[8],ymm0[9],ymm2[9],ymm0[10],ymm2[10],ymm0[11],ymm2[11],ymm0[12],ymm2[12],ymm0[13],ymm2[13],ymm0[14],ymm2[14],ymm0[15],ymm2[15],ymm0[24],ymm2[24],ymm0[25],ymm2[25],ymm0[26],ymm2[26],ymm0[27],ymm2[27],ymm0[28],ymm2[28],ymm0[29],ymm2[29],ymm0[30],ymm2[30],ymm0[31],ymm2[31]
|
||||
; CHECK-VBMI-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3
|
||||
; CHECK-VBMI-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm2[0],ymm0[1],ymm2[1],ymm0[2],ymm2[2],ymm0[3],ymm2[3],ymm0[4],ymm2[4],ymm0[5],ymm2[5],ymm0[6],ymm2[6],ymm0[7],ymm2[7],ymm0[16],ymm2[16],ymm0[17],ymm2[17],ymm0[18],ymm2[18],ymm0[19],ymm2[19],ymm0[20],ymm2[20],ymm0[21],ymm2[21],ymm0[22],ymm2[22],ymm0[23],ymm2[23]
|
||||
; CHECK-VBMI-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
||||
; CHECK-VBMI-NEXT: vmovdqa {{.*#+}} ymm2 = [1,3,5,7,9,11,13,15,33,35,37,39,41,43,45,47,17,19,21,23,25,27,29,31,49,51,53,55,57,59,61,63]
|
||||
; CHECK-VBMI-NEXT: vpermi2b %ymm3, %ymm0, %ymm2
|
||||
; CHECK-VBMI-NEXT: vpor %ymm2, %ymm1, %ymm0
|
||||
; CHECK-VBMI-NEXT: retq
|
||||
; CHECK-LABEL: constant_rotate_v32i8:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vpsllw $4, %ymm0, %ymm1
|
||||
; CHECK-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
|
||||
; CHECK-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256]
|
||||
; CHECK-NEXT: # ymm2 = mem[0,1,0,1]
|
||||
; CHECK-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm1
|
||||
; CHECK-NEXT: vpsllw $2, %ymm1, %ymm3
|
||||
; CHECK-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3
|
||||
; CHECK-NEXT: vpaddb %ymm2, %ymm2, %ymm2
|
||||
; CHECK-NEXT: vpblendvb %ymm2, %ymm3, %ymm1, %ymm1
|
||||
; CHECK-NEXT: vpaddb %ymm1, %ymm1, %ymm3
|
||||
; CHECK-NEXT: vpaddb %ymm2, %ymm2, %ymm2
|
||||
; CHECK-NEXT: vpblendvb %ymm2, %ymm3, %ymm1, %ymm1
|
||||
; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; CHECK-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm0[8],ymm2[8],ymm0[9],ymm2[9],ymm0[10],ymm2[10],ymm0[11],ymm2[11],ymm0[12],ymm2[12],ymm0[13],ymm2[13],ymm0[14],ymm2[14],ymm0[15],ymm2[15],ymm0[24],ymm2[24],ymm0[25],ymm2[25],ymm0[26],ymm2[26],ymm0[27],ymm2[27],ymm0[28],ymm2[28],ymm0[29],ymm2[29],ymm0[30],ymm2[30],ymm0[31],ymm2[31]
|
||||
; CHECK-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3
|
||||
; CHECK-NEXT: vpsrlw $8, %ymm3, %ymm3
|
||||
; CHECK-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm2[0],ymm0[1],ymm2[1],ymm0[2],ymm2[2],ymm0[3],ymm2[3],ymm0[4],ymm2[4],ymm0[5],ymm2[5],ymm0[6],ymm2[6],ymm0[7],ymm2[7],ymm0[16],ymm2[16],ymm0[17],ymm2[17],ymm0[18],ymm2[18],ymm0[19],ymm2[19],ymm0[20],ymm2[20],ymm0[21],ymm2[21],ymm0[22],ymm2[22],ymm0[23],ymm2[23]
|
||||
; CHECK-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
||||
; CHECK-NEXT: vpsrlw $8, %ymm0, %ymm0
|
||||
; CHECK-NEXT: vpackuswb %ymm3, %ymm0, %ymm0
|
||||
; CHECK-NEXT: vpor %ymm0, %ymm1, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%shl = shl <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1>
|
||||
%lshr = lshr <32 x i8> %a, <i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>
|
||||
%or = or <32 x i8> %shl, %lshr
|
||||
|
@ -1643,25 +1643,12 @@ define <16 x i8> @shuffe_v16i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_2
|
||||
; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: shuffe_v16i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpsrlw $8, %xmm1, %xmm1
|
||||
; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512VLBW-LABEL: shuffe_v16i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30:
|
||||
; AVX512VLBW: # %bb.0:
|
||||
; AVX512VLBW-NEXT: vpsrlw $8, %xmm0, %xmm0
|
||||
; AVX512VLBW-NEXT: vpsrlw $8, %xmm1, %xmm1
|
||||
; AVX512VLBW-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
||||
; AVX512VLBW-NEXT: retq
|
||||
;
|
||||
; AVX512VLVBMI-LABEL: shuffe_v16i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30:
|
||||
; AVX512VLVBMI: # %bb.0:
|
||||
; AVX512VLVBMI-NEXT: vmovdqa {{.*#+}} xmm2 = [1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31]
|
||||
; AVX512VLVBMI-NEXT: vpermt2b %xmm1, %xmm2, %xmm0
|
||||
; AVX512VLVBMI-NEXT: retq
|
||||
; AVX2OR512VL-LABEL: shuffe_v16i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30:
|
||||
; AVX2OR512VL: # %bb.0:
|
||||
; AVX2OR512VL-NEXT: vpsrlw $8, %xmm0, %xmm0
|
||||
; AVX2OR512VL-NEXT: vpsrlw $8, %xmm1, %xmm1
|
||||
; AVX2OR512VL-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
||||
; AVX2OR512VL-NEXT: retq
|
||||
;
|
||||
; XOP-LABEL: shuffe_v16i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30:
|
||||
; XOP: # %bb.0:
|
||||
|
@ -1073,18 +1073,12 @@ define <8 x i16> @shuffle_v8i16_08196e7f(<8 x i16> %a, <8 x i16> %b) {
|
||||
; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v8i16_08196e7f:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,3,2,3]
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
|
||||
; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: shuffle_v8i16_08196e7f:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm2 = [0,8,1,9,6,14,7,15]
|
||||
; AVX512VL-NEXT: vpermt2w %xmm1, %xmm2, %xmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
; AVX2OR512VL-LABEL: shuffle_v8i16_08196e7f:
|
||||
; AVX2OR512VL: # %bb.0:
|
||||
; AVX2OR512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,3,2,3]
|
||||
; AVX2OR512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
|
||||
; AVX2OR512VL-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
|
||||
; AVX2OR512VL-NEXT: retq
|
||||
;
|
||||
; XOP-LABEL: shuffle_v8i16_08196e7f:
|
||||
; XOP: # %bb.0:
|
||||
@ -1109,18 +1103,12 @@ define <8 x i16> @shuffle_v8i16_0c1d6879(<8 x i16> %a, <8 x i16> %b) {
|
||||
; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v8i16_0c1d6879:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,0,2,3]
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
|
||||
; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: shuffle_v8i16_0c1d6879:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm2 = [0,12,1,13,6,8,7,9]
|
||||
; AVX512VL-NEXT: vpermt2w %xmm1, %xmm2, %xmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
; AVX2OR512VL-LABEL: shuffle_v8i16_0c1d6879:
|
||||
; AVX2OR512VL: # %bb.0:
|
||||
; AVX2OR512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,0,2,3]
|
||||
; AVX2OR512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
|
||||
; AVX2OR512VL-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
|
||||
; AVX2OR512VL-NEXT: retq
|
||||
;
|
||||
; XOP-LABEL: shuffle_v8i16_0c1d6879:
|
||||
; XOP: # %bb.0:
|
||||
@ -1158,11 +1146,18 @@ define <8 x i16> @shuffle_v8i16_109832ba(<8 x i16> %a, <8 x i16> %b) {
|
||||
; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,6,7,2,3,12,13,8,9,14,15,10,11]
|
||||
; AVX2-FAST-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: shuffle_v8i16_109832ba:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm2 = [1,0,9,8,3,2,11,10]
|
||||
; AVX512VL-NEXT: vpermt2w %xmm1, %xmm2, %xmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
; AVX512VL-SLOW-LABEL: shuffle_v8i16_109832ba:
|
||||
; AVX512VL-SLOW: # %bb.0:
|
||||
; AVX512VL-SLOW-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
|
||||
; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7]
|
||||
; AVX512VL-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,7,5]
|
||||
; AVX512VL-SLOW-NEXT: retq
|
||||
;
|
||||
; AVX512VL-FAST-LABEL: shuffle_v8i16_109832ba:
|
||||
; AVX512VL-FAST: # %bb.0:
|
||||
; AVX512VL-FAST-NEXT: vmovdqa {{.*#+}} xmm2 = [1,0,9,8,3,2,11,10]
|
||||
; AVX512VL-FAST-NEXT: vpermt2w %xmm1, %xmm2, %xmm0
|
||||
; AVX512VL-FAST-NEXT: retq
|
||||
;
|
||||
; XOP-LABEL: shuffle_v8i16_109832ba:
|
||||
; XOP: # %bb.0:
|
||||
@ -1244,11 +1239,18 @@ define <8 x i16> @shuffle_v8i16_0213cedf(<8 x i16> %a, <8 x i16> %b) {
|
||||
; AVX2-FAST-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
||||
; AVX2-FAST-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: shuffle_v8i16_0213cedf:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm2 = [0,2,1,3,12,14,13,15]
|
||||
; AVX512VL-NEXT: vpermt2w %xmm1, %xmm2, %xmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
; AVX512VL-SLOW-LABEL: shuffle_v8i16_0213cedf:
|
||||
; AVX512VL-SLOW: # %bb.0:
|
||||
; AVX512VL-SLOW-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,5,7]
|
||||
; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
|
||||
; AVX512VL-SLOW-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
|
||||
; AVX512VL-SLOW-NEXT: retq
|
||||
;
|
||||
; AVX512VL-FAST-LABEL: shuffle_v8i16_0213cedf:
|
||||
; AVX512VL-FAST: # %bb.0:
|
||||
; AVX512VL-FAST-NEXT: vmovdqa {{.*#+}} xmm2 = [0,2,1,3,12,14,13,15]
|
||||
; AVX512VL-FAST-NEXT: vpermt2w %xmm1, %xmm2, %xmm0
|
||||
; AVX512VL-FAST-NEXT: retq
|
||||
;
|
||||
; XOP-LABEL: shuffle_v8i16_0213cedf:
|
||||
; XOP: # %bb.0:
|
||||
@ -1303,11 +1305,18 @@ define <8 x i16> @shuffle_v8i16_443aXXXX(<8 x i16> %a, <8 x i16> %b) {
|
||||
; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,6,7,4,5,8,9,10,11,12,13,14,15]
|
||||
; AVX2-FAST-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: shuffle_v8i16_443aXXXX:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm2 = [4,4,3,10,4,5,6,7]
|
||||
; AVX512VL-NEXT: vpermt2w %xmm1, %xmm2, %xmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
; AVX512VL-SLOW-LABEL: shuffle_v8i16_443aXXXX:
|
||||
; AVX512VL-SLOW: # %bb.0:
|
||||
; AVX512VL-SLOW-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
|
||||
; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
|
||||
; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7]
|
||||
; AVX512VL-SLOW-NEXT: retq
|
||||
;
|
||||
; AVX512VL-FAST-LABEL: shuffle_v8i16_443aXXXX:
|
||||
; AVX512VL-FAST: # %bb.0:
|
||||
; AVX512VL-FAST-NEXT: vmovdqa {{.*#+}} xmm2 = [4,4,3,10,4,5,6,7]
|
||||
; AVX512VL-FAST-NEXT: vpermt2w %xmm1, %xmm2, %xmm0
|
||||
; AVX512VL-FAST-NEXT: retq
|
||||
;
|
||||
; XOP-LABEL: shuffle_v8i16_443aXXXX:
|
||||
; XOP: # %bb.0:
|
||||
@ -1628,11 +1637,19 @@ define <8 x i16> @shuffle_v8i16_XXX1X579(<8 x i16> %a, <8 x i16> %b) {
|
||||
; AVX2-FAST-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
|
||||
; AVX2-FAST-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: shuffle_v8i16_XXX1X579:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm2 = [1,1,1,1,4,5,7,9]
|
||||
; AVX512VL-NEXT: vpermt2w %xmm1, %xmm2, %xmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
; AVX512VL-SLOW-LABEL: shuffle_v8i16_XXX1X579:
|
||||
; AVX512VL-SLOW: # %bb.0:
|
||||
; AVX512VL-SLOW-NEXT: vpbroadcastd %xmm1, %xmm1
|
||||
; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
|
||||
; AVX512VL-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7]
|
||||
; AVX512VL-SLOW-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
|
||||
; AVX512VL-SLOW-NEXT: retq
|
||||
;
|
||||
; AVX512VL-FAST-LABEL: shuffle_v8i16_XXX1X579:
|
||||
; AVX512VL-FAST: # %bb.0:
|
||||
; AVX512VL-FAST-NEXT: vmovdqa {{.*#+}} xmm2 = [1,1,1,1,4,5,7,9]
|
||||
; AVX512VL-FAST-NEXT: vpermt2w %xmm1, %xmm2, %xmm0
|
||||
; AVX512VL-FAST-NEXT: retq
|
||||
;
|
||||
; XOP-LABEL: shuffle_v8i16_XXX1X579:
|
||||
; XOP: # %bb.0:
|
||||
@ -2849,19 +2866,12 @@ define <8 x i16> @shuffle_v8i16_fu3ucc5u(<8 x i16> %a, <8 x i16> %b) {
|
||||
; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v8i16_fu3ucc5u:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
|
||||
; AVX2-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,5,4,4]
|
||||
; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: shuffle_v8i16_fu3ucc5u:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm2 = [7,5,11,12,4,4,13,14]
|
||||
; AVX512VL-NEXT: vpermi2w %xmm0, %xmm1, %xmm2
|
||||
; AVX512VL-NEXT: vmovdqa %xmm2, %xmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
; AVX2OR512VL-LABEL: shuffle_v8i16_fu3ucc5u:
|
||||
; AVX2OR512VL: # %bb.0:
|
||||
; AVX2OR512VL-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
|
||||
; AVX2OR512VL-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,5,4,4]
|
||||
; AVX2OR512VL-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
|
||||
; AVX2OR512VL-NEXT: retq
|
||||
;
|
||||
; XOP-LABEL: shuffle_v8i16_fu3ucc5u:
|
||||
; XOP: # %bb.0:
|
||||
|
@ -1374,17 +1374,11 @@ define <16 x i16> @shuffle_v16i16_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_3
|
||||
; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v16i16_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_31:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpblendw {{.*#+}} ymm1 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15]
|
||||
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: shuffle_v16i16_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_31:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,31]
|
||||
; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
|
||||
; AVX512VL-NEXT: retq
|
||||
; AVX2OR512VL-LABEL: shuffle_v16i16_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_31:
|
||||
; AVX2OR512VL: # %bb.0:
|
||||
; AVX2OR512VL-NEXT: vpblendw {{.*#+}} ymm1 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15]
|
||||
; AVX2OR512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
|
||||
; AVX2OR512VL-NEXT: retq
|
||||
;
|
||||
; XOPAVX1-LABEL: shuffle_v16i16_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_31:
|
||||
; XOPAVX1: # %bb.0:
|
||||
@ -1409,17 +1403,11 @@ define <16 x i16> @shuffle_v16i16_16_01_02_03_04_05_06_07_08_09_10_11_12_13_14_1
|
||||
; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v16i16_16_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm0[1,2,3,4,5,6,7]
|
||||
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: shuffle_v16i16_16_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [16,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
||||
; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
|
||||
; AVX512VL-NEXT: retq
|
||||
; AVX2OR512VL-LABEL: shuffle_v16i16_16_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15:
|
||||
; AVX2OR512VL: # %bb.0:
|
||||
; AVX2OR512VL-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm0[1,2,3,4,5,6,7]
|
||||
; AVX2OR512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
|
||||
; AVX2OR512VL-NEXT: retq
|
||||
;
|
||||
; XOPAVX1-LABEL: shuffle_v16i16_16_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15:
|
||||
; XOPAVX1: # %bb.0:
|
||||
|
@ -831,8 +831,9 @@ define <64 x i8> @shuffle_v64i8_shift_00_02_04_06_08_10_12_14_64_66_68_70_72_74_
|
||||
;
|
||||
; AVX512VBMI-LABEL: shuffle_v64i8_shift_00_02_04_06_08_10_12_14_64_66_68_70_72_74_76_78_16_18_20_22_24_26_28_30_80_82_84_86_88_90_92_94_32_34_36_38_40_42_44_46_96_98_100_102_104_106_108_110_48_50_52_54_56_58_60_62_112_114_116_118_120_122_124_126:
|
||||
; AVX512VBMI: # %bb.0:
|
||||
; AVX512VBMI-NEXT: vmovdqa64 {{.*#+}} zmm2 = [1,3,5,7,9,11,13,15,65,67,69,71,73,75,77,79,17,19,21,23,25,27,29,31,81,83,85,87,89,91,93,95,33,35,37,39,41,43,45,47,97,99,101,103,105,107,109,111,49,51,53,55,57,59,61,63,113,115,117,119,121,123,125,127]
|
||||
; AVX512VBMI-NEXT: vpermt2b %zmm1, %zmm2, %zmm0
|
||||
; AVX512VBMI-NEXT: vpsrlw $8, %zmm0, %zmm0
|
||||
; AVX512VBMI-NEXT: vpsrlw $8, %zmm1, %zmm1
|
||||
; AVX512VBMI-NEXT: vpackuswb %zmm1, %zmm0, %zmm0
|
||||
; AVX512VBMI-NEXT: retq
|
||||
%1 = lshr <32 x i16> %a0, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
|
||||
%2 = lshr <32 x i16> %a1, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
|
||||
|
@ -71,9 +71,9 @@ define <32 x i8> @foo(<48 x i8>* %x0) {
|
||||
; AVX512BW-NEXT: kmovd %eax, %k1
|
||||
; AVX512BW-NEXT: vmovdqu8 %ymm2, %ymm1 {%k1}
|
||||
; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[u,u,u,u,u,u,1,2,4,5,7,8,10,11,13,14]
|
||||
; AVX512BW-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm2
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm0 = [0,1,2,3,4,5,6,7,8,9,10,27,28,29,30,31]
|
||||
; AVX512BW-NEXT: vpermi2w %ymm2, %ymm1, %ymm0
|
||||
; AVX512BW-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
|
||||
; AVX512BW-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6,7],ymm1[8,9,10],ymm0[11,12,13,14,15]
|
||||
; AVX512BW-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512VBMI-LABEL: foo:
|
||||
|
@ -443,15 +443,16 @@ define <8 x i8> @interleaved_load_vf8_i8_stride4(<32 x i8>* %ptr) {
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vmovdqa (%rdi), %xmm0
|
||||
; AVX512-NEXT: vmovdqa 16(%rdi), %xmm1
|
||||
; AVX512-NEXT: vmovdqa {{.*#+}} xmm2 = [1,0,3,2,33,32,35,34]
|
||||
; AVX512-NEXT: vmovdqa {{.*#+}} xmm3 = [6,7,2,3,14,15,10,11,14,15,10,11,12,13,14,15]
|
||||
; AVX512-NEXT: vpshufb %xmm3, %xmm1, %xmm4
|
||||
; AVX512-NEXT: vpshufb %xmm3, %xmm0, %xmm3
|
||||
; AVX512-NEXT: vpermi2w %zmm4, %zmm3, %zmm2
|
||||
; AVX512-NEXT: vprold $8, %zmm4, %zmm4
|
||||
; AVX512-NEXT: vmovdqa {{.*#+}} xmm2 = [6,7,2,3,14,15,10,11,14,15,10,11,12,13,14,15]
|
||||
; AVX512-NEXT: vpshufb %xmm2, %xmm1, %xmm3
|
||||
; AVX512-NEXT: vpshuflw {{.*#+}} xmm4 = xmm3[1,0,3,2,4,5,6,7]
|
||||
; AVX512-NEXT: vpshufb %xmm2, %xmm0, %xmm2
|
||||
; AVX512-NEXT: vpshuflw {{.*#+}} xmm5 = xmm2[1,0,3,2,4,5,6,7]
|
||||
; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm4 = xmm5[0],xmm4[0]
|
||||
; AVX512-NEXT: vprold $8, %zmm3, %zmm3
|
||||
; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm4[0]
|
||||
; AVX512-NEXT: vpaddb %xmm2, %xmm3, %xmm2
|
||||
; AVX512-NEXT: vprold $8, %zmm2, %zmm2
|
||||
; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
|
||||
; AVX512-NEXT: vpaddb %xmm4, %xmm2, %xmm2
|
||||
; AVX512-NEXT: vmovdqa {{.*#+}} xmm3 = <1,u,5,u,9,u,13,u,13,u,5,u,12,u,13,u>
|
||||
; AVX512-NEXT: vpshufb %xmm3, %xmm1, %xmm1
|
||||
; AVX512-NEXT: vpshufb %xmm3, %xmm0, %xmm0
|
||||
|
Loading…
Reference in New Issue
Block a user