1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-20 03:23:01 +02:00

[X86] Remove isel patterns for using unmasked vmovdqa32/vmovdqu32 for integer vector loads.

These patterns were just looking for a vXi64 bitcasted to vXi32, but there is no advantage to using vmovdqa32 over vmovdqa64.

llvm-svn: 322819
This commit is contained in:
Craig Topper 2018-01-18 07:44:06 +00:00
parent b50de8f5ac
commit 5585e2235d
17 changed files with 116 additions and 115 deletions

View File

@ -3192,19 +3192,20 @@ multiclass avx512_load<bits<8> opc, string OpcodeStr, MoveLoadStoreItins itins,
multiclass avx512_alignedload_vl<bits<8> opc, string OpcodeStr,
AVX512VLVectorVTInfo _,
Predicate prd> {
Predicate prd,
bit NoRMPattern = 0> {
let Predicates = [prd] in
defm Z : avx512_load<opc, OpcodeStr, SSE_MOVA, _.info512,
_.info512.AlignedLdFrag, masked_load_aligned512>,
EVEX_V512;
_.info512.AlignedLdFrag, masked_load_aligned512,
NoRMPattern>, EVEX_V512;
let Predicates = [prd, HasVLX] in {
defm Z256 : avx512_load<opc, OpcodeStr, SSE_MOVA, _.info256,
_.info256.AlignedLdFrag, masked_load_aligned256>,
EVEX_V256;
_.info256.AlignedLdFrag, masked_load_aligned256,
NoRMPattern>, EVEX_V256;
defm Z128 : avx512_load<opc, OpcodeStr, SSE_MOVA, _.info128,
_.info128.AlignedLdFrag, masked_load_aligned128>,
EVEX_V128;
_.info128.AlignedLdFrag, masked_load_aligned128,
NoRMPattern>, EVEX_V128;
}
}
@ -3324,7 +3325,7 @@ defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512,
PD, VEX_W, EVEX_CD8<64, CD8VF>;
defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info,
HasAVX512>,
HasAVX512, 1>,
avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info,
HasAVX512, "VMOVDQA32">,
PD, EVEX_CD8<32, CD8VF>;
@ -3346,7 +3347,7 @@ defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI, 1>
XD, VEX_W, EVEX_CD8<16, CD8VF>;
defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
0, null_frag>,
1, null_frag>,
avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info,
HasAVX512, "VMOVDQU32">,
XS, EVEX_CD8<32, CD8VF>;

View File

@ -427,7 +427,7 @@ declare <16 x i32> @llvm.x86.avx512.mask.loadu.d.512(i8*, <16 x i32>, i16)
define <16 x i32> @test_mask_load_unaligned_d(i8* %ptr, i8* %ptr2, <16 x i32> %data, i16 %mask) {
; CHECK-LABEL: test_mask_load_unaligned_d:
; CHECK: ## %bb.0:
; CHECK-NEXT: vmovdqu32 (%rdi), %zmm0
; CHECK-NEXT: vmovdqu64 (%rdi), %zmm0
; CHECK-NEXT: kmovw %edx, %k1
; CHECK-NEXT: vmovdqu32 (%rsi), %zmm0 {%k1}
; CHECK-NEXT: vmovdqu32 (%rdi), %zmm1 {%k1} {z}
@ -463,7 +463,7 @@ declare <16 x i32> @llvm.x86.avx512.mask.load.d.512(i8*, <16 x i32>, i16)
define <16 x i32> @test_mask_load_aligned_d(<16 x i32> %data, i8* %ptr, i16 %mask) {
; CHECK-LABEL: test_mask_load_aligned_d:
; CHECK: ## %bb.0:
; CHECK-NEXT: vmovdqa32 (%rdi), %zmm0
; CHECK-NEXT: vmovdqa64 (%rdi), %zmm0
; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vmovdqa32 (%rdi), %zmm0 {%k1}
; CHECK-NEXT: vmovdqa32 (%rdi), %zmm1 {%k1} {z}

View File

@ -1300,7 +1300,7 @@ define <16 x i32> @test_16xi32_perm_mask0(<16 x i32> %vec, <16 x i32> %mask) {
define <16 x i32> @test_masked_16xi32_perm_mask0(<16 x i32> %vec, <16 x i32> %vec2, <16 x i32> %mask) {
; GENERIC-LABEL: test_masked_16xi32_perm_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa32 {{.*#+}} zmm3 = [14,12,11,6,4,1,6,9,14,14,6,1,12,11,0,7] sched: [4:0.50]
; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [14,12,11,6,4,1,6,9,14,14,6,1,12,11,0,7] sched: [4:0.50]
; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %zmm4, %zmm2, %k1 # sched: [3:1.00]
; GENERIC-NEXT: vpermd %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00]
@ -1309,7 +1309,7 @@ define <16 x i32> @test_masked_16xi32_perm_mask0(<16 x i32> %vec, <16 x i32> %ve
;
; SKX-LABEL: test_masked_16xi32_perm_mask0:
; SKX: # %bb.0:
; SKX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [14,12,11,6,4,1,6,9,14,14,6,1,12,11,0,7] sched: [8:0.50]
; SKX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [14,12,11,6,4,1,6,9,14,14,6,1,12,11,0,7] sched: [8:0.50]
; SKX-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
; SKX-NEXT: vpcmpeqd %zmm4, %zmm2, %k1 # sched: [3:1.00]
; SKX-NEXT: vpermd %zmm0, %zmm3, %zmm1 {%k1} # sched: [3:1.00]
@ -1324,7 +1324,7 @@ define <16 x i32> @test_masked_16xi32_perm_mask0(<16 x i32> %vec, <16 x i32> %ve
define <16 x i32> @test_masked_z_16xi32_perm_mask0(<16 x i32> %vec, <16 x i32> %mask) {
; GENERIC-LABEL: test_masked_z_16xi32_perm_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa32 {{.*#+}} zmm2 = [14,12,11,6,4,1,6,9,14,14,6,1,12,11,0,7] sched: [4:0.50]
; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [14,12,11,6,4,1,6,9,14,14,6,1,12,11,0,7] sched: [4:0.50]
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %zmm3, %zmm1, %k1 # sched: [3:1.00]
; GENERIC-NEXT: vpermd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00]
@ -1332,7 +1332,7 @@ define <16 x i32> @test_masked_z_16xi32_perm_mask0(<16 x i32> %vec, <16 x i32> %
;
; SKX-LABEL: test_masked_z_16xi32_perm_mask0:
; SKX: # %bb.0:
; SKX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [14,12,11,6,4,1,6,9,14,14,6,1,12,11,0,7] sched: [8:0.50]
; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [14,12,11,6,4,1,6,9,14,14,6,1,12,11,0,7] sched: [8:0.50]
; SKX-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; SKX-NEXT: vpcmpeqd %zmm3, %zmm1, %k1 # sched: [3:1.00]
; SKX-NEXT: vpermd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [3:1.00]
@ -1345,7 +1345,7 @@ define <16 x i32> @test_masked_z_16xi32_perm_mask0(<16 x i32> %vec, <16 x i32> %
define <16 x i32> @test_masked_16xi32_perm_mask1(<16 x i32> %vec, <16 x i32> %vec2, <16 x i32> %mask) {
; GENERIC-LABEL: test_masked_16xi32_perm_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa32 {{.*#+}} zmm3 = [10,0,14,15,11,1,1,5,0,5,0,15,13,1,14,3] sched: [4:0.50]
; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [10,0,14,15,11,1,1,5,0,5,0,15,13,1,14,3] sched: [4:0.50]
; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %zmm4, %zmm2, %k1 # sched: [3:1.00]
; GENERIC-NEXT: vpermd %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00]
@ -1354,7 +1354,7 @@ define <16 x i32> @test_masked_16xi32_perm_mask1(<16 x i32> %vec, <16 x i32> %ve
;
; SKX-LABEL: test_masked_16xi32_perm_mask1:
; SKX: # %bb.0:
; SKX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [10,0,14,15,11,1,1,5,0,5,0,15,13,1,14,3] sched: [8:0.50]
; SKX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [10,0,14,15,11,1,1,5,0,5,0,15,13,1,14,3] sched: [8:0.50]
; SKX-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
; SKX-NEXT: vpcmpeqd %zmm4, %zmm2, %k1 # sched: [3:1.00]
; SKX-NEXT: vpermd %zmm0, %zmm3, %zmm1 {%k1} # sched: [3:1.00]
@ -1369,7 +1369,7 @@ define <16 x i32> @test_masked_16xi32_perm_mask1(<16 x i32> %vec, <16 x i32> %ve
define <16 x i32> @test_masked_z_16xi32_perm_mask1(<16 x i32> %vec, <16 x i32> %mask) {
; GENERIC-LABEL: test_masked_z_16xi32_perm_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa32 {{.*#+}} zmm2 = [10,0,14,15,11,1,1,5,0,5,0,15,13,1,14,3] sched: [4:0.50]
; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [10,0,14,15,11,1,1,5,0,5,0,15,13,1,14,3] sched: [4:0.50]
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %zmm3, %zmm1, %k1 # sched: [3:1.00]
; GENERIC-NEXT: vpermd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00]
@ -1377,7 +1377,7 @@ define <16 x i32> @test_masked_z_16xi32_perm_mask1(<16 x i32> %vec, <16 x i32> %
;
; SKX-LABEL: test_masked_z_16xi32_perm_mask1:
; SKX: # %bb.0:
; SKX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [10,0,14,15,11,1,1,5,0,5,0,15,13,1,14,3] sched: [8:0.50]
; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [10,0,14,15,11,1,1,5,0,5,0,15,13,1,14,3] sched: [8:0.50]
; SKX-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; SKX-NEXT: vpcmpeqd %zmm3, %zmm1, %k1 # sched: [3:1.00]
; SKX-NEXT: vpermd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [3:1.00]
@ -1390,7 +1390,7 @@ define <16 x i32> @test_masked_z_16xi32_perm_mask1(<16 x i32> %vec, <16 x i32> %
define <16 x i32> @test_masked_16xi32_perm_mask2(<16 x i32> %vec, <16 x i32> %vec2, <16 x i32> %mask) {
; GENERIC-LABEL: test_masked_16xi32_perm_mask2:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa32 {{.*#+}} zmm3 = [3,10,15,1,0,5,0,9,13,2,1,5,15,2,15,5] sched: [4:0.50]
; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [3,10,15,1,0,5,0,9,13,2,1,5,15,2,15,5] sched: [4:0.50]
; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %zmm4, %zmm2, %k1 # sched: [3:1.00]
; GENERIC-NEXT: vpermd %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00]
@ -1399,7 +1399,7 @@ define <16 x i32> @test_masked_16xi32_perm_mask2(<16 x i32> %vec, <16 x i32> %ve
;
; SKX-LABEL: test_masked_16xi32_perm_mask2:
; SKX: # %bb.0:
; SKX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [3,10,15,1,0,5,0,9,13,2,1,5,15,2,15,5] sched: [8:0.50]
; SKX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [3,10,15,1,0,5,0,9,13,2,1,5,15,2,15,5] sched: [8:0.50]
; SKX-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
; SKX-NEXT: vpcmpeqd %zmm4, %zmm2, %k1 # sched: [3:1.00]
; SKX-NEXT: vpermd %zmm0, %zmm3, %zmm1 {%k1} # sched: [3:1.00]
@ -1414,7 +1414,7 @@ define <16 x i32> @test_masked_16xi32_perm_mask2(<16 x i32> %vec, <16 x i32> %ve
define <16 x i32> @test_masked_z_16xi32_perm_mask2(<16 x i32> %vec, <16 x i32> %mask) {
; GENERIC-LABEL: test_masked_z_16xi32_perm_mask2:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa32 {{.*#+}} zmm2 = [3,10,15,1,0,5,0,9,13,2,1,5,15,2,15,5] sched: [4:0.50]
; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [3,10,15,1,0,5,0,9,13,2,1,5,15,2,15,5] sched: [4:0.50]
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %zmm3, %zmm1, %k1 # sched: [3:1.00]
; GENERIC-NEXT: vpermd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00]
@ -1422,7 +1422,7 @@ define <16 x i32> @test_masked_z_16xi32_perm_mask2(<16 x i32> %vec, <16 x i32> %
;
; SKX-LABEL: test_masked_z_16xi32_perm_mask2:
; SKX: # %bb.0:
; SKX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [3,10,15,1,0,5,0,9,13,2,1,5,15,2,15,5] sched: [8:0.50]
; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [3,10,15,1,0,5,0,9,13,2,1,5,15,2,15,5] sched: [8:0.50]
; SKX-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; SKX-NEXT: vpcmpeqd %zmm3, %zmm1, %k1 # sched: [3:1.00]
; SKX-NEXT: vpermd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [3:1.00]
@ -1450,7 +1450,7 @@ define <16 x i32> @test_16xi32_perm_mask3(<16 x i32> %vec) {
define <16 x i32> @test_masked_16xi32_perm_mask3(<16 x i32> %vec, <16 x i32> %vec2, <16 x i32> %mask) {
; GENERIC-LABEL: test_masked_16xi32_perm_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa32 {{.*#+}} zmm3 = [7,4,14,15,10,2,15,1,9,2,14,15,12,5,3,12] sched: [4:0.50]
; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [7,4,14,15,10,2,15,1,9,2,14,15,12,5,3,12] sched: [4:0.50]
; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %zmm4, %zmm2, %k1 # sched: [3:1.00]
; GENERIC-NEXT: vpermd %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00]
@ -1459,7 +1459,7 @@ define <16 x i32> @test_masked_16xi32_perm_mask3(<16 x i32> %vec, <16 x i32> %ve
;
; SKX-LABEL: test_masked_16xi32_perm_mask3:
; SKX: # %bb.0:
; SKX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [7,4,14,15,10,2,15,1,9,2,14,15,12,5,3,12] sched: [8:0.50]
; SKX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [7,4,14,15,10,2,15,1,9,2,14,15,12,5,3,12] sched: [8:0.50]
; SKX-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
; SKX-NEXT: vpcmpeqd %zmm4, %zmm2, %k1 # sched: [3:1.00]
; SKX-NEXT: vpermd %zmm0, %zmm3, %zmm1 {%k1} # sched: [3:1.00]
@ -1474,7 +1474,7 @@ define <16 x i32> @test_masked_16xi32_perm_mask3(<16 x i32> %vec, <16 x i32> %ve
define <16 x i32> @test_masked_z_16xi32_perm_mask3(<16 x i32> %vec, <16 x i32> %mask) {
; GENERIC-LABEL: test_masked_z_16xi32_perm_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa32 {{.*#+}} zmm2 = [7,4,14,15,10,2,15,1,9,2,14,15,12,5,3,12] sched: [4:0.50]
; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [7,4,14,15,10,2,15,1,9,2,14,15,12,5,3,12] sched: [4:0.50]
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %zmm3, %zmm1, %k1 # sched: [3:1.00]
; GENERIC-NEXT: vpermd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00]
@ -1482,7 +1482,7 @@ define <16 x i32> @test_masked_z_16xi32_perm_mask3(<16 x i32> %vec, <16 x i32> %
;
; SKX-LABEL: test_masked_z_16xi32_perm_mask3:
; SKX: # %bb.0:
; SKX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [7,4,14,15,10,2,15,1,9,2,14,15,12,5,3,12] sched: [8:0.50]
; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [7,4,14,15,10,2,15,1,9,2,14,15,12,5,3,12] sched: [8:0.50]
; SKX-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; SKX-NEXT: vpcmpeqd %zmm3, %zmm1, %k1 # sched: [3:1.00]
; SKX-NEXT: vpermd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [3:1.00]
@ -1511,7 +1511,7 @@ define <16 x i32> @test_16xi32_perm_mem_mask0(<16 x i32>* %vp) {
define <16 x i32> @test_masked_16xi32_perm_mem_mask0(<16 x i32>* %vp, <16 x i32> %vec2, <16 x i32> %mask) {
; GENERIC-LABEL: test_masked_16xi32_perm_mem_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,1,6,8,11,2,6,10,1,7,5,15,0,6,6] sched: [4:0.50]
; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,1,6,8,11,2,6,10,1,7,5,15,0,6,6] sched: [4:0.50]
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %zmm3, %zmm1, %k1 # sched: [3:1.00]
; GENERIC-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [5:1.00]
@ -1519,7 +1519,7 @@ define <16 x i32> @test_masked_16xi32_perm_mem_mask0(<16 x i32>* %vp, <16 x i32>
;
; SKX-LABEL: test_masked_16xi32_perm_mem_mask0:
; SKX: # %bb.0:
; SKX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,1,6,8,11,2,6,10,1,7,5,15,0,6,6] sched: [8:0.50]
; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,1,6,8,11,2,6,10,1,7,5,15,0,6,6] sched: [8:0.50]
; SKX-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; SKX-NEXT: vpcmpeqd %zmm3, %zmm1, %k1 # sched: [3:1.00]
; SKX-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [10:1.00]
@ -1534,7 +1534,7 @@ define <16 x i32> @test_masked_16xi32_perm_mem_mask0(<16 x i32>* %vp, <16 x i32>
define <16 x i32> @test_masked_z_16xi32_perm_mem_mask0(<16 x i32>* %vp, <16 x i32> %mask) {
; GENERIC-LABEL: test_masked_z_16xi32_perm_mem_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa32 {{.*#+}} zmm1 = [0,1,1,6,8,11,2,6,10,1,7,5,15,0,6,6] sched: [4:0.50]
; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,1,1,6,8,11,2,6,10,1,7,5,15,0,6,6] sched: [4:0.50]
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %zmm2, %zmm0, %k1 # sched: [3:1.00]
; GENERIC-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [5:1.00]
@ -1542,7 +1542,7 @@ define <16 x i32> @test_masked_z_16xi32_perm_mem_mask0(<16 x i32>* %vp, <16 x i3
;
; SKX-LABEL: test_masked_z_16xi32_perm_mem_mask0:
; SKX: # %bb.0:
; SKX-NEXT: vmovdqa32 {{.*#+}} zmm1 = [0,1,1,6,8,11,2,6,10,1,7,5,15,0,6,6] sched: [8:0.50]
; SKX-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,1,1,6,8,11,2,6,10,1,7,5,15,0,6,6] sched: [8:0.50]
; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; SKX-NEXT: vpcmpeqd %zmm2, %zmm0, %k1 # sched: [3:1.00]
; SKX-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [10:1.00]
@ -1557,7 +1557,7 @@ define <16 x i32> @test_masked_z_16xi32_perm_mem_mask0(<16 x i32>* %vp, <16 x i3
define <16 x i32> @test_masked_16xi32_perm_mem_mask1(<16 x i32>* %vp, <16 x i32> %vec2, <16 x i32> %mask) {
; GENERIC-LABEL: test_masked_16xi32_perm_mem_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa32 {{.*#+}} zmm2 = [11,5,3,4,7,15,12,4,8,11,12,7,6,12,6,3] sched: [4:0.50]
; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [11,5,3,4,7,15,12,4,8,11,12,7,6,12,6,3] sched: [4:0.50]
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %zmm3, %zmm1, %k1 # sched: [3:1.00]
; GENERIC-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [5:1.00]
@ -1565,7 +1565,7 @@ define <16 x i32> @test_masked_16xi32_perm_mem_mask1(<16 x i32>* %vp, <16 x i32>
;
; SKX-LABEL: test_masked_16xi32_perm_mem_mask1:
; SKX: # %bb.0:
; SKX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [11,5,3,4,7,15,12,4,8,11,12,7,6,12,6,3] sched: [8:0.50]
; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [11,5,3,4,7,15,12,4,8,11,12,7,6,12,6,3] sched: [8:0.50]
; SKX-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; SKX-NEXT: vpcmpeqd %zmm3, %zmm1, %k1 # sched: [3:1.00]
; SKX-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [10:1.00]
@ -1580,7 +1580,7 @@ define <16 x i32> @test_masked_16xi32_perm_mem_mask1(<16 x i32>* %vp, <16 x i32>
define <16 x i32> @test_masked_z_16xi32_perm_mem_mask1(<16 x i32>* %vp, <16 x i32> %mask) {
; GENERIC-LABEL: test_masked_z_16xi32_perm_mem_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa32 {{.*#+}} zmm1 = [11,5,3,4,7,15,12,4,8,11,12,7,6,12,6,3] sched: [4:0.50]
; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [11,5,3,4,7,15,12,4,8,11,12,7,6,12,6,3] sched: [4:0.50]
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %zmm2, %zmm0, %k1 # sched: [3:1.00]
; GENERIC-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [5:1.00]
@ -1588,7 +1588,7 @@ define <16 x i32> @test_masked_z_16xi32_perm_mem_mask1(<16 x i32>* %vp, <16 x i3
;
; SKX-LABEL: test_masked_z_16xi32_perm_mem_mask1:
; SKX: # %bb.0:
; SKX-NEXT: vmovdqa32 {{.*#+}} zmm1 = [11,5,3,4,7,15,12,4,8,11,12,7,6,12,6,3] sched: [8:0.50]
; SKX-NEXT: vmovdqa64 {{.*#+}} zmm1 = [11,5,3,4,7,15,12,4,8,11,12,7,6,12,6,3] sched: [8:0.50]
; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; SKX-NEXT: vpcmpeqd %zmm2, %zmm0, %k1 # sched: [3:1.00]
; SKX-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [10:1.00]
@ -1603,7 +1603,7 @@ define <16 x i32> @test_masked_z_16xi32_perm_mem_mask1(<16 x i32>* %vp, <16 x i3
define <16 x i32> @test_masked_16xi32_perm_mem_mask2(<16 x i32>* %vp, <16 x i32> %vec2, <16 x i32> %mask) {
; GENERIC-LABEL: test_masked_16xi32_perm_mem_mask2:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa32 {{.*#+}} zmm2 = [7,14,2,7,10,7,3,0,11,9,0,4,12,10,8,2] sched: [4:0.50]
; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [7,14,2,7,10,7,3,0,11,9,0,4,12,10,8,2] sched: [4:0.50]
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %zmm3, %zmm1, %k1 # sched: [3:1.00]
; GENERIC-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [5:1.00]
@ -1611,7 +1611,7 @@ define <16 x i32> @test_masked_16xi32_perm_mem_mask2(<16 x i32>* %vp, <16 x i32>
;
; SKX-LABEL: test_masked_16xi32_perm_mem_mask2:
; SKX: # %bb.0:
; SKX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [7,14,2,7,10,7,3,0,11,9,0,4,12,10,8,2] sched: [8:0.50]
; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [7,14,2,7,10,7,3,0,11,9,0,4,12,10,8,2] sched: [8:0.50]
; SKX-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; SKX-NEXT: vpcmpeqd %zmm3, %zmm1, %k1 # sched: [3:1.00]
; SKX-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [10:1.00]
@ -1626,7 +1626,7 @@ define <16 x i32> @test_masked_16xi32_perm_mem_mask2(<16 x i32>* %vp, <16 x i32>
define <16 x i32> @test_masked_z_16xi32_perm_mem_mask2(<16 x i32>* %vp, <16 x i32> %mask) {
; GENERIC-LABEL: test_masked_z_16xi32_perm_mem_mask2:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa32 {{.*#+}} zmm1 = [7,14,2,7,10,7,3,0,11,9,0,4,12,10,8,2] sched: [4:0.50]
; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [7,14,2,7,10,7,3,0,11,9,0,4,12,10,8,2] sched: [4:0.50]
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %zmm2, %zmm0, %k1 # sched: [3:1.00]
; GENERIC-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [5:1.00]
@ -1634,7 +1634,7 @@ define <16 x i32> @test_masked_z_16xi32_perm_mem_mask2(<16 x i32>* %vp, <16 x i3
;
; SKX-LABEL: test_masked_z_16xi32_perm_mem_mask2:
; SKX: # %bb.0:
; SKX-NEXT: vmovdqa32 {{.*#+}} zmm1 = [7,14,2,7,10,7,3,0,11,9,0,4,12,10,8,2] sched: [8:0.50]
; SKX-NEXT: vmovdqa64 {{.*#+}} zmm1 = [7,14,2,7,10,7,3,0,11,9,0,4,12,10,8,2] sched: [8:0.50]
; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; SKX-NEXT: vpcmpeqd %zmm2, %zmm0, %k1 # sched: [3:1.00]
; SKX-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [10:1.00]
@ -1665,7 +1665,7 @@ define <16 x i32> @test_16xi32_perm_mem_mask3(<16 x i32>* %vp) {
define <16 x i32> @test_masked_16xi32_perm_mem_mask3(<16 x i32>* %vp, <16 x i32> %vec2, <16 x i32> %mask) {
; GENERIC-LABEL: test_masked_16xi32_perm_mem_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa32 {{.*#+}} zmm2 = [11,7,10,12,3,12,4,15,1,14,0,4,8,9,6,1] sched: [4:0.50]
; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [11,7,10,12,3,12,4,15,1,14,0,4,8,9,6,1] sched: [4:0.50]
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %zmm3, %zmm1, %k1 # sched: [3:1.00]
; GENERIC-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [5:1.00]
@ -1673,7 +1673,7 @@ define <16 x i32> @test_masked_16xi32_perm_mem_mask3(<16 x i32>* %vp, <16 x i32>
;
; SKX-LABEL: test_masked_16xi32_perm_mem_mask3:
; SKX: # %bb.0:
; SKX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [11,7,10,12,3,12,4,15,1,14,0,4,8,9,6,1] sched: [8:0.50]
; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [11,7,10,12,3,12,4,15,1,14,0,4,8,9,6,1] sched: [8:0.50]
; SKX-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; SKX-NEXT: vpcmpeqd %zmm3, %zmm1, %k1 # sched: [3:1.00]
; SKX-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [10:1.00]
@ -1688,7 +1688,7 @@ define <16 x i32> @test_masked_16xi32_perm_mem_mask3(<16 x i32>* %vp, <16 x i32>
define <16 x i32> @test_masked_z_16xi32_perm_mem_mask3(<16 x i32>* %vp, <16 x i32> %mask) {
; GENERIC-LABEL: test_masked_z_16xi32_perm_mem_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa32 {{.*#+}} zmm1 = [11,7,10,12,3,12,4,15,1,14,0,4,8,9,6,1] sched: [4:0.50]
; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [11,7,10,12,3,12,4,15,1,14,0,4,8,9,6,1] sched: [4:0.50]
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %zmm2, %zmm0, %k1 # sched: [3:1.00]
; GENERIC-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [5:1.00]
@ -1696,7 +1696,7 @@ define <16 x i32> @test_masked_z_16xi32_perm_mem_mask3(<16 x i32>* %vp, <16 x i3
;
; SKX-LABEL: test_masked_z_16xi32_perm_mem_mask3:
; SKX: # %bb.0:
; SKX-NEXT: vmovdqa32 {{.*#+}} zmm1 = [11,7,10,12,3,12,4,15,1,14,0,4,8,9,6,1] sched: [8:0.50]
; SKX-NEXT: vmovdqa64 {{.*#+}} zmm1 = [11,7,10,12,3,12,4,15,1,14,0,4,8,9,6,1] sched: [8:0.50]
; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; SKX-NEXT: vpcmpeqd %zmm2, %zmm0, %k1 # sched: [3:1.00]
; SKX-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [10:1.00]

View File

@ -595,7 +595,7 @@ define <16 x i32> @test_2xi32_to_16xi32_mem(<2 x i32>* %vp) {
; CHECK-LABEL: test_2xi32_to_16xi32_mem:
; CHECK: # %bb.0:
; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
; CHECK-NEXT: vmovdqa32 {{.*#+}} zmm1 = [0,2,0,2,0,2,0,2,0,2,0,2,0,2,0,2]
; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,2,0,2,0,2,0,2,0,2,0,2,0,2,0,2]
; CHECK-NEXT: vpermd %zmm0, %zmm1, %zmm0
; CHECK-NEXT: retq
%vec = load <2 x i32>, <2 x i32>* %vp
@ -606,7 +606,7 @@ define <16 x i32> @test_masked_2xi32_to_16xi32_mem_mask0(<2 x i32>* %vp, <16 x i
; CHECK-LABEL: test_masked_2xi32_to_16xi32_mem_mask0:
; CHECK: # %bb.0:
; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm2 = mem[0],zero,mem[1],zero
; CHECK-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,2,0,2,0,2,0,2,0,2,0,2,0,2,0,2]
; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,2,0,2,0,2,0,2,0,2,0,2,0,2,0,2]
; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vpcmpeqd %zmm4, %zmm1, %k1
; CHECK-NEXT: vpermd %zmm2, %zmm3, %zmm0 {%k1}
@ -622,7 +622,7 @@ define <16 x i32> @test_masked_z_2xi32_to_16xi32_mem_mask0(<2 x i32>* %vp, <16 x
; CHECK-LABEL: test_masked_z_2xi32_to_16xi32_mem_mask0:
; CHECK: # %bb.0:
; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero
; CHECK-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,2,0,2,0,2,0,2,0,2,0,2,0,2,0,2]
; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,2,0,2,0,2,0,2,0,2,0,2,0,2,0,2]
; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vpcmpeqd %zmm3, %zmm0, %k1
; CHECK-NEXT: vpermd %zmm1, %zmm2, %zmm0 {%k1} {z}
@ -637,7 +637,7 @@ define <16 x i32> @test_masked_2xi32_to_16xi32_mem_mask1(<2 x i32>* %vp, <16 x i
; CHECK-LABEL: test_masked_2xi32_to_16xi32_mem_mask1:
; CHECK: # %bb.0:
; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm2 = mem[0],zero,mem[1],zero
; CHECK-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,2,0,2,0,2,0,2,0,2,0,2,0,2,0,2]
; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,2,0,2,0,2,0,2,0,2,0,2,0,2,0,2]
; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vpcmpeqd %zmm4, %zmm1, %k1
; CHECK-NEXT: vpermd %zmm2, %zmm3, %zmm0 {%k1}
@ -653,7 +653,7 @@ define <16 x i32> @test_masked_z_2xi32_to_16xi32_mem_mask1(<2 x i32>* %vp, <16 x
; CHECK-LABEL: test_masked_z_2xi32_to_16xi32_mem_mask1:
; CHECK: # %bb.0:
; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero
; CHECK-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,2,0,2,0,2,0,2,0,2,0,2,0,2,0,2]
; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,2,0,2,0,2,0,2,0,2,0,2,0,2,0,2]
; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vpcmpeqd %zmm3, %zmm0, %k1
; CHECK-NEXT: vpermd %zmm1, %zmm2, %zmm0 {%k1} {z}
@ -668,7 +668,7 @@ define <16 x i32> @test_masked_2xi32_to_16xi32_mem_mask2(<2 x i32>* %vp, <16 x i
; CHECK-LABEL: test_masked_2xi32_to_16xi32_mem_mask2:
; CHECK: # %bb.0:
; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm2 = mem[0],zero,mem[1],zero
; CHECK-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,2,0,2,0,2,0,2,0,2,0,2,0,2,0,2]
; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,2,0,2,0,2,0,2,0,2,0,2,0,2,0,2]
; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vpcmpeqd %zmm4, %zmm1, %k1
; CHECK-NEXT: vpermd %zmm2, %zmm3, %zmm0 {%k1}
@ -684,7 +684,7 @@ define <16 x i32> @test_masked_z_2xi32_to_16xi32_mem_mask2(<2 x i32>* %vp, <16 x
; CHECK-LABEL: test_masked_z_2xi32_to_16xi32_mem_mask2:
; CHECK: # %bb.0:
; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero
; CHECK-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,2,0,2,0,2,0,2,0,2,0,2,0,2,0,2]
; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,2,0,2,0,2,0,2,0,2,0,2,0,2,0,2]
; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vpcmpeqd %zmm3, %zmm0, %k1
; CHECK-NEXT: vpermd %zmm1, %zmm2, %zmm0 {%k1} {z}
@ -699,7 +699,7 @@ define <16 x i32> @test_masked_2xi32_to_16xi32_mem_mask3(<2 x i32>* %vp, <16 x i
; CHECK-LABEL: test_masked_2xi32_to_16xi32_mem_mask3:
; CHECK: # %bb.0:
; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm2 = mem[0],zero,mem[1],zero
; CHECK-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,2,0,2,0,2,0,2,0,2,0,2,0,2,0,2]
; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,2,0,2,0,2,0,2,0,2,0,2,0,2,0,2]
; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vpcmpeqd %zmm4, %zmm1, %k1
; CHECK-NEXT: vpermd %zmm2, %zmm3, %zmm0 {%k1}
@ -715,7 +715,7 @@ define <16 x i32> @test_masked_z_2xi32_to_16xi32_mem_mask3(<2 x i32>* %vp, <16 x
; CHECK-LABEL: test_masked_z_2xi32_to_16xi32_mem_mask3:
; CHECK: # %bb.0:
; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero
; CHECK-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,2,0,2,0,2,0,2,0,2,0,2,0,2,0,2]
; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,2,0,2,0,2,0,2,0,2,0,2,0,2,0,2]
; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vpcmpeqd %zmm3, %zmm0, %k1
; CHECK-NEXT: vpermd %zmm1, %zmm2, %zmm0 {%k1} {z}

View File

@ -1674,7 +1674,7 @@ define <8 x i32> @test_masked_z_16xi32_to_8xi32_perm_mem_mask0(<16 x i32>* %vp,
define <8 x i32> @test_masked_16xi32_to_8xi32_perm_mem_mask1(<16 x i32>* %vp, <8 x i32> %vec2, <8 x i32> %mask) {
; CHECK-LABEL: test_masked_16xi32_to_8xi32_perm_mem_mask1:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovdqa32 (%rdi), %zmm2
; CHECK-NEXT: vmovdqa64 (%rdi), %zmm2
; CHECK-NEXT: vextracti64x4 $1, %zmm2, %ymm3
; CHECK-NEXT: vmovdqa {{.*#+}} ymm4 = [7,3,6,11,0,1,5,15]
; CHECK-NEXT: vpermi2d %ymm2, %ymm3, %ymm4
@ -1692,7 +1692,7 @@ define <8 x i32> @test_masked_16xi32_to_8xi32_perm_mem_mask1(<16 x i32>* %vp, <8
define <8 x i32> @test_masked_z_16xi32_to_8xi32_perm_mem_mask1(<16 x i32>* %vp, <8 x i32> %mask) {
; CHECK-LABEL: test_masked_z_16xi32_to_8xi32_perm_mem_mask1:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovdqa32 (%rdi), %zmm2
; CHECK-NEXT: vmovdqa64 (%rdi), %zmm2
; CHECK-NEXT: vextracti64x4 $1, %zmm2, %ymm3
; CHECK-NEXT: vmovdqa {{.*#+}} ymm1 = [7,3,6,11,0,1,5,15]
; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
@ -1710,7 +1710,7 @@ define <8 x i32> @test_masked_z_16xi32_to_8xi32_perm_mem_mask1(<16 x i32>* %vp,
define <8 x i32> @test_masked_16xi32_to_8xi32_perm_mem_mask2(<16 x i32>* %vp, <8 x i32> %vec2, <8 x i32> %mask) {
; CHECK-LABEL: test_masked_16xi32_to_8xi32_perm_mem_mask2:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovdqa32 (%rdi), %zmm2
; CHECK-NEXT: vmovdqa64 (%rdi), %zmm2
; CHECK-NEXT: vextracti64x4 $1, %zmm2, %ymm3
; CHECK-NEXT: vmovdqa {{.*#+}} ymm4 = [4,14,1,5,4,2,8,10]
; CHECK-NEXT: vpermi2d %ymm2, %ymm3, %ymm4
@ -1728,7 +1728,7 @@ define <8 x i32> @test_masked_16xi32_to_8xi32_perm_mem_mask2(<16 x i32>* %vp, <8
define <8 x i32> @test_masked_z_16xi32_to_8xi32_perm_mem_mask2(<16 x i32>* %vp, <8 x i32> %mask) {
; CHECK-LABEL: test_masked_z_16xi32_to_8xi32_perm_mem_mask2:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovdqa32 (%rdi), %zmm2
; CHECK-NEXT: vmovdqa64 (%rdi), %zmm2
; CHECK-NEXT: vextracti64x4 $1, %zmm2, %ymm3
; CHECK-NEXT: vmovdqa {{.*#+}} ymm1 = [4,14,1,5,4,2,8,10]
; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
@ -1746,7 +1746,7 @@ define <8 x i32> @test_masked_z_16xi32_to_8xi32_perm_mem_mask2(<16 x i32>* %vp,
define <8 x i32> @test_16xi32_to_8xi32_perm_mem_mask3(<16 x i32>* %vp) {
; CHECK-LABEL: test_16xi32_to_8xi32_perm_mem_mask3:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovdqa32 (%rdi), %zmm1
; CHECK-NEXT: vmovdqa64 (%rdi), %zmm1
; CHECK-NEXT: vextracti64x4 $1, %zmm1, %ymm2
; CHECK-NEXT: vmovdqa {{.*#+}} ymm0 = [8,4,1,13,15,4,6,12]
; CHECK-NEXT: vpermi2d %ymm2, %ymm1, %ymm0
@ -1758,7 +1758,7 @@ define <8 x i32> @test_16xi32_to_8xi32_perm_mem_mask3(<16 x i32>* %vp) {
define <8 x i32> @test_masked_16xi32_to_8xi32_perm_mem_mask3(<16 x i32>* %vp, <8 x i32> %vec2, <8 x i32> %mask) {
; CHECK-LABEL: test_masked_16xi32_to_8xi32_perm_mem_mask3:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovdqa32 (%rdi), %zmm2
; CHECK-NEXT: vmovdqa64 (%rdi), %zmm2
; CHECK-NEXT: vextracti64x4 $1, %zmm2, %ymm3
; CHECK-NEXT: vmovdqa {{.*#+}} ymm4 = [8,4,1,13,15,4,6,12]
; CHECK-NEXT: vpermi2d %ymm3, %ymm2, %ymm4
@ -1776,7 +1776,7 @@ define <8 x i32> @test_masked_16xi32_to_8xi32_perm_mem_mask3(<16 x i32>* %vp, <8
define <8 x i32> @test_masked_z_16xi32_to_8xi32_perm_mem_mask3(<16 x i32>* %vp, <8 x i32> %mask) {
; CHECK-LABEL: test_masked_z_16xi32_to_8xi32_perm_mem_mask3:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovdqa32 (%rdi), %zmm2
; CHECK-NEXT: vmovdqa64 (%rdi), %zmm2
; CHECK-NEXT: vextracti64x4 $1, %zmm2, %ymm3
; CHECK-NEXT: vmovdqa {{.*#+}} ymm1 = [8,4,1,13,15,4,6,12]
; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
@ -1794,7 +1794,7 @@ define <8 x i32> @test_masked_z_16xi32_to_8xi32_perm_mem_mask3(<16 x i32>* %vp,
define <4 x i32> @test_16xi32_to_4xi32_perm_mem_mask0(<16 x i32>* %vp) {
; CHECK-LABEL: test_16xi32_to_4xi32_perm_mem_mask0:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovdqa32 (%rdi), %zmm1
; CHECK-NEXT: vmovdqa64 (%rdi), %zmm1
; CHECK-NEXT: vextracti64x4 $1, %zmm1, %ymm2
; CHECK-NEXT: vmovdqa {{.*#+}} ymm0 = <13,0,0,6,u,u,u,u>
; CHECK-NEXT: vpermi2d %ymm2, %ymm1, %ymm0
@ -1808,7 +1808,7 @@ define <4 x i32> @test_16xi32_to_4xi32_perm_mem_mask0(<16 x i32>* %vp) {
define <4 x i32> @test_masked_16xi32_to_4xi32_perm_mem_mask0(<16 x i32>* %vp, <4 x i32> %vec2, <4 x i32> %mask) {
; CHECK-LABEL: test_masked_16xi32_to_4xi32_perm_mem_mask0:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovdqa32 (%rdi), %zmm2
; CHECK-NEXT: vmovdqa64 (%rdi), %zmm2
; CHECK-NEXT: vextracti64x4 $1, %zmm2, %ymm3
; CHECK-NEXT: vmovdqa {{.*#+}} ymm4 = <13,0,0,6,u,u,u,u>
; CHECK-NEXT: vpermi2d %ymm3, %ymm2, %ymm4
@ -1827,7 +1827,7 @@ define <4 x i32> @test_masked_16xi32_to_4xi32_perm_mem_mask0(<16 x i32>* %vp, <4
define <4 x i32> @test_masked_z_16xi32_to_4xi32_perm_mem_mask0(<16 x i32>* %vp, <4 x i32> %mask) {
; CHECK-LABEL: test_masked_z_16xi32_to_4xi32_perm_mem_mask0:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovdqa32 (%rdi), %zmm1
; CHECK-NEXT: vmovdqa64 (%rdi), %zmm1
; CHECK-NEXT: vextracti64x4 $1, %zmm1, %ymm2
; CHECK-NEXT: vmovdqa {{.*#+}} ymm3 = <13,0,0,6,u,u,u,u>
; CHECK-NEXT: vpermi2d %ymm2, %ymm1, %ymm3
@ -1846,7 +1846,7 @@ define <4 x i32> @test_masked_z_16xi32_to_4xi32_perm_mem_mask0(<16 x i32>* %vp,
define <4 x i32> @test_masked_16xi32_to_4xi32_perm_mem_mask1(<16 x i32>* %vp, <4 x i32> %vec2, <4 x i32> %mask) {
; CHECK-LABEL: test_masked_16xi32_to_4xi32_perm_mem_mask1:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovdqa32 (%rdi), %zmm2
; CHECK-NEXT: vmovdqa64 (%rdi), %zmm2
; CHECK-NEXT: vextracti64x4 $1, %zmm2, %ymm3
; CHECK-NEXT: vmovdqa {{.*#+}} ymm4 = [15,5,3,2,15,5,7,6]
; CHECK-NEXT: vpermi2d %ymm2, %ymm3, %ymm4
@ -1865,7 +1865,7 @@ define <4 x i32> @test_masked_16xi32_to_4xi32_perm_mem_mask1(<16 x i32>* %vp, <4
define <4 x i32> @test_masked_z_16xi32_to_4xi32_perm_mem_mask1(<16 x i32>* %vp, <4 x i32> %mask) {
; CHECK-LABEL: test_masked_z_16xi32_to_4xi32_perm_mem_mask1:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovdqa32 (%rdi), %zmm1
; CHECK-NEXT: vmovdqa64 (%rdi), %zmm1
; CHECK-NEXT: vextracti64x4 $1, %zmm1, %ymm2
; CHECK-NEXT: vmovdqa {{.*#+}} ymm3 = [15,5,3,2,15,5,7,6]
; CHECK-NEXT: vpermi2d %ymm1, %ymm2, %ymm3
@ -1884,7 +1884,7 @@ define <4 x i32> @test_masked_z_16xi32_to_4xi32_perm_mem_mask1(<16 x i32>* %vp,
define <4 x i32> @test_masked_16xi32_to_4xi32_perm_mem_mask2(<16 x i32>* %vp, <4 x i32> %vec2, <4 x i32> %mask) {
; CHECK-LABEL: test_masked_16xi32_to_4xi32_perm_mem_mask2:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovdqa32 (%rdi), %zmm2
; CHECK-NEXT: vmovdqa64 (%rdi), %zmm2
; CHECK-NEXT: vextracti64x4 $1, %zmm2, %ymm3
; CHECK-NEXT: vmovdqa {{.*#+}} ymm4 = <2,15,6,9,u,u,u,u>
; CHECK-NEXT: vpermi2d %ymm3, %ymm2, %ymm4
@ -1903,7 +1903,7 @@ define <4 x i32> @test_masked_16xi32_to_4xi32_perm_mem_mask2(<16 x i32>* %vp, <4
define <4 x i32> @test_masked_z_16xi32_to_4xi32_perm_mem_mask2(<16 x i32>* %vp, <4 x i32> %mask) {
; CHECK-LABEL: test_masked_z_16xi32_to_4xi32_perm_mem_mask2:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovdqa32 (%rdi), %zmm1
; CHECK-NEXT: vmovdqa64 (%rdi), %zmm1
; CHECK-NEXT: vextracti64x4 $1, %zmm1, %ymm2
; CHECK-NEXT: vmovdqa {{.*#+}} ymm3 = <2,15,6,9,u,u,u,u>
; CHECK-NEXT: vpermi2d %ymm2, %ymm1, %ymm3
@ -1922,7 +1922,7 @@ define <4 x i32> @test_masked_z_16xi32_to_4xi32_perm_mem_mask2(<16 x i32>* %vp,
define <4 x i32> @test_16xi32_to_4xi32_perm_mem_mask3(<16 x i32>* %vp) {
; CHECK-LABEL: test_16xi32_to_4xi32_perm_mem_mask3:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovdqa32 (%rdi), %zmm0
; CHECK-NEXT: vmovdqa64 (%rdi), %zmm0
; CHECK-NEXT: vmovd %xmm0, %eax
; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm1
; CHECK-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
@ -1940,7 +1940,7 @@ define <4 x i32> @test_16xi32_to_4xi32_perm_mem_mask3(<16 x i32>* %vp) {
define <4 x i32> @test_masked_16xi32_to_4xi32_perm_mem_mask3(<16 x i32>* %vp, <4 x i32> %vec2, <4 x i32> %mask) {
; CHECK-LABEL: test_masked_16xi32_to_4xi32_perm_mem_mask3:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovdqa32 (%rdi), %zmm2
; CHECK-NEXT: vmovdqa64 (%rdi), %zmm2
; CHECK-NEXT: vmovd %xmm2, %eax
; CHECK-NEXT: vextracti128 $1, %ymm2, %xmm3
; CHECK-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[2,3,0,1]
@ -1964,7 +1964,7 @@ define <4 x i32> @test_masked_16xi32_to_4xi32_perm_mem_mask3(<16 x i32>* %vp, <4
define <4 x i32> @test_masked_z_16xi32_to_4xi32_perm_mem_mask3(<16 x i32>* %vp, <4 x i32> %mask) {
; CHECK-LABEL: test_masked_z_16xi32_to_4xi32_perm_mem_mask3:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovdqa32 (%rdi), %zmm1
; CHECK-NEXT: vmovdqa64 (%rdi), %zmm1
; CHECK-NEXT: vmovd %xmm1, %eax
; CHECK-NEXT: vextracti128 $1, %ymm1, %xmm2
; CHECK-NEXT: vpshufd {{.*#+}} xmm3 = xmm2[2,3,0,1]

View File

@ -823,7 +823,7 @@ define <16 x i32> @test_16xi32_perm_mask0(<16 x i32> %vec) {
define <16 x i32> @test_masked_16xi32_perm_mask0(<16 x i32> %vec, <16 x i32> %vec2, <16 x i32> %mask) {
; CHECK-LABEL: test_masked_16xi32_perm_mask0:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovdqa32 {{.*#+}} zmm3 = [14,12,11,6,4,1,6,9,14,14,6,1,12,11,0,7]
; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm3 = [14,12,11,6,4,1,6,9,14,14,6,1,12,11,0,7]
; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vpcmpeqd %zmm4, %zmm2, %k1
; CHECK-NEXT: vpermd %zmm0, %zmm3, %zmm1 {%k1}
@ -838,7 +838,7 @@ define <16 x i32> @test_masked_16xi32_perm_mask0(<16 x i32> %vec, <16 x i32> %ve
define <16 x i32> @test_masked_z_16xi32_perm_mask0(<16 x i32> %vec, <16 x i32> %mask) {
; CHECK-LABEL: test_masked_z_16xi32_perm_mask0:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovdqa32 {{.*#+}} zmm2 = [14,12,11,6,4,1,6,9,14,14,6,1,12,11,0,7]
; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm2 = [14,12,11,6,4,1,6,9,14,14,6,1,12,11,0,7]
; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vpcmpeqd %zmm3, %zmm1, %k1
; CHECK-NEXT: vpermd %zmm0, %zmm2, %zmm0 {%k1} {z}
@ -851,7 +851,7 @@ define <16 x i32> @test_masked_z_16xi32_perm_mask0(<16 x i32> %vec, <16 x i32> %
define <16 x i32> @test_masked_16xi32_perm_mask1(<16 x i32> %vec, <16 x i32> %vec2, <16 x i32> %mask) {
; CHECK-LABEL: test_masked_16xi32_perm_mask1:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovdqa32 {{.*#+}} zmm3 = [10,0,14,15,11,1,1,5,0,5,0,15,13,1,14,3]
; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm3 = [10,0,14,15,11,1,1,5,0,5,0,15,13,1,14,3]
; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vpcmpeqd %zmm4, %zmm2, %k1
; CHECK-NEXT: vpermd %zmm0, %zmm3, %zmm1 {%k1}
@ -866,7 +866,7 @@ define <16 x i32> @test_masked_16xi32_perm_mask1(<16 x i32> %vec, <16 x i32> %ve
define <16 x i32> @test_masked_z_16xi32_perm_mask1(<16 x i32> %vec, <16 x i32> %mask) {
; CHECK-LABEL: test_masked_z_16xi32_perm_mask1:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovdqa32 {{.*#+}} zmm2 = [10,0,14,15,11,1,1,5,0,5,0,15,13,1,14,3]
; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm2 = [10,0,14,15,11,1,1,5,0,5,0,15,13,1,14,3]
; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vpcmpeqd %zmm3, %zmm1, %k1
; CHECK-NEXT: vpermd %zmm0, %zmm2, %zmm0 {%k1} {z}
@ -879,7 +879,7 @@ define <16 x i32> @test_masked_z_16xi32_perm_mask1(<16 x i32> %vec, <16 x i32> %
define <16 x i32> @test_masked_16xi32_perm_mask2(<16 x i32> %vec, <16 x i32> %vec2, <16 x i32> %mask) {
; CHECK-LABEL: test_masked_16xi32_perm_mask2:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovdqa32 {{.*#+}} zmm3 = [3,10,15,1,0,5,0,9,13,2,1,5,15,2,15,5]
; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm3 = [3,10,15,1,0,5,0,9,13,2,1,5,15,2,15,5]
; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vpcmpeqd %zmm4, %zmm2, %k1
; CHECK-NEXT: vpermd %zmm0, %zmm3, %zmm1 {%k1}
@ -894,7 +894,7 @@ define <16 x i32> @test_masked_16xi32_perm_mask2(<16 x i32> %vec, <16 x i32> %ve
define <16 x i32> @test_masked_z_16xi32_perm_mask2(<16 x i32> %vec, <16 x i32> %mask) {
; CHECK-LABEL: test_masked_z_16xi32_perm_mask2:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovdqa32 {{.*#+}} zmm2 = [3,10,15,1,0,5,0,9,13,2,1,5,15,2,15,5]
; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm2 = [3,10,15,1,0,5,0,9,13,2,1,5,15,2,15,5]
; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vpcmpeqd %zmm3, %zmm1, %k1
; CHECK-NEXT: vpermd %zmm0, %zmm2, %zmm0 {%k1} {z}
@ -916,7 +916,7 @@ define <16 x i32> @test_16xi32_perm_mask3(<16 x i32> %vec) {
define <16 x i32> @test_masked_16xi32_perm_mask3(<16 x i32> %vec, <16 x i32> %vec2, <16 x i32> %mask) {
; CHECK-LABEL: test_masked_16xi32_perm_mask3:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovdqa32 {{.*#+}} zmm3 = [7,4,14,15,10,2,15,1,9,2,14,15,12,5,3,12]
; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm3 = [7,4,14,15,10,2,15,1,9,2,14,15,12,5,3,12]
; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vpcmpeqd %zmm4, %zmm2, %k1
; CHECK-NEXT: vpermd %zmm0, %zmm3, %zmm1 {%k1}
@ -931,7 +931,7 @@ define <16 x i32> @test_masked_16xi32_perm_mask3(<16 x i32> %vec, <16 x i32> %ve
define <16 x i32> @test_masked_z_16xi32_perm_mask3(<16 x i32> %vec, <16 x i32> %mask) {
; CHECK-LABEL: test_masked_z_16xi32_perm_mask3:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovdqa32 {{.*#+}} zmm2 = [7,4,14,15,10,2,15,1,9,2,14,15,12,5,3,12]
; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm2 = [7,4,14,15,10,2,15,1,9,2,14,15,12,5,3,12]
; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vpcmpeqd %zmm3, %zmm1, %k1
; CHECK-NEXT: vpermd %zmm0, %zmm2, %zmm0 {%k1} {z}
@ -954,7 +954,7 @@ define <16 x i32> @test_16xi32_perm_mem_mask0(<16 x i32>* %vp) {
define <16 x i32> @test_masked_16xi32_perm_mem_mask0(<16 x i32>* %vp, <16 x i32> %vec2, <16 x i32> %mask) {
; CHECK-LABEL: test_masked_16xi32_perm_mem_mask0:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,1,6,8,11,2,6,10,1,7,5,15,0,6,6]
; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,1,6,8,11,2,6,10,1,7,5,15,0,6,6]
; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vpcmpeqd %zmm3, %zmm1, %k1
; CHECK-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1}
@ -969,7 +969,7 @@ define <16 x i32> @test_masked_16xi32_perm_mem_mask0(<16 x i32>* %vp, <16 x i32>
define <16 x i32> @test_masked_z_16xi32_perm_mem_mask0(<16 x i32>* %vp, <16 x i32> %mask) {
; CHECK-LABEL: test_masked_z_16xi32_perm_mem_mask0:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovdqa32 {{.*#+}} zmm1 = [0,1,1,6,8,11,2,6,10,1,7,5,15,0,6,6]
; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,1,1,6,8,11,2,6,10,1,7,5,15,0,6,6]
; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vpcmpeqd %zmm2, %zmm0, %k1
; CHECK-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z}
@ -984,7 +984,7 @@ define <16 x i32> @test_masked_z_16xi32_perm_mem_mask0(<16 x i32>* %vp, <16 x i3
define <16 x i32> @test_masked_16xi32_perm_mem_mask1(<16 x i32>* %vp, <16 x i32> %vec2, <16 x i32> %mask) {
; CHECK-LABEL: test_masked_16xi32_perm_mem_mask1:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovdqa32 {{.*#+}} zmm2 = [11,5,3,4,7,15,12,4,8,11,12,7,6,12,6,3]
; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm2 = [11,5,3,4,7,15,12,4,8,11,12,7,6,12,6,3]
; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vpcmpeqd %zmm3, %zmm1, %k1
; CHECK-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1}
@ -999,7 +999,7 @@ define <16 x i32> @test_masked_16xi32_perm_mem_mask1(<16 x i32>* %vp, <16 x i32>
define <16 x i32> @test_masked_z_16xi32_perm_mem_mask1(<16 x i32>* %vp, <16 x i32> %mask) {
; CHECK-LABEL: test_masked_z_16xi32_perm_mem_mask1:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovdqa32 {{.*#+}} zmm1 = [11,5,3,4,7,15,12,4,8,11,12,7,6,12,6,3]
; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm1 = [11,5,3,4,7,15,12,4,8,11,12,7,6,12,6,3]
; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vpcmpeqd %zmm2, %zmm0, %k1
; CHECK-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z}
@ -1014,7 +1014,7 @@ define <16 x i32> @test_masked_z_16xi32_perm_mem_mask1(<16 x i32>* %vp, <16 x i3
define <16 x i32> @test_masked_16xi32_perm_mem_mask2(<16 x i32>* %vp, <16 x i32> %vec2, <16 x i32> %mask) {
; CHECK-LABEL: test_masked_16xi32_perm_mem_mask2:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovdqa32 {{.*#+}} zmm2 = [7,14,2,7,10,7,3,0,11,9,0,4,12,10,8,2]
; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm2 = [7,14,2,7,10,7,3,0,11,9,0,4,12,10,8,2]
; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vpcmpeqd %zmm3, %zmm1, %k1
; CHECK-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1}
@ -1029,7 +1029,7 @@ define <16 x i32> @test_masked_16xi32_perm_mem_mask2(<16 x i32>* %vp, <16 x i32>
define <16 x i32> @test_masked_z_16xi32_perm_mem_mask2(<16 x i32>* %vp, <16 x i32> %mask) {
; CHECK-LABEL: test_masked_z_16xi32_perm_mem_mask2:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovdqa32 {{.*#+}} zmm1 = [7,14,2,7,10,7,3,0,11,9,0,4,12,10,8,2]
; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm1 = [7,14,2,7,10,7,3,0,11,9,0,4,12,10,8,2]
; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vpcmpeqd %zmm2, %zmm0, %k1
; CHECK-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z}
@ -1054,7 +1054,7 @@ define <16 x i32> @test_16xi32_perm_mem_mask3(<16 x i32>* %vp) {
define <16 x i32> @test_masked_16xi32_perm_mem_mask3(<16 x i32>* %vp, <16 x i32> %vec2, <16 x i32> %mask) {
; CHECK-LABEL: test_masked_16xi32_perm_mem_mask3:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovdqa32 {{.*#+}} zmm2 = [11,7,10,12,3,12,4,15,1,14,0,4,8,9,6,1]
; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm2 = [11,7,10,12,3,12,4,15,1,14,0,4,8,9,6,1]
; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vpcmpeqd %zmm3, %zmm1, %k1
; CHECK-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1}
@ -1069,7 +1069,7 @@ define <16 x i32> @test_masked_16xi32_perm_mem_mask3(<16 x i32>* %vp, <16 x i32>
define <16 x i32> @test_masked_z_16xi32_perm_mem_mask3(<16 x i32>* %vp, <16 x i32> %mask) {
; CHECK-LABEL: test_masked_z_16xi32_perm_mem_mask3:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovdqa32 {{.*#+}} zmm1 = [11,7,10,12,3,12,4,15,1,14,0,4,8,9,6,1]
; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm1 = [11,7,10,12,3,12,4,15,1,14,0,4,8,9,6,1]
; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vpcmpeqd %zmm2, %zmm0, %k1
; CHECK-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z}

View File

@ -216,7 +216,7 @@ define <16 x i32> @vpternlog_v16i32_012_load0_mask(<16 x i32>* %x0ptr, <16 x i32
; CHECK-LABEL: vpternlog_v16i32_012_load0_mask:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vmovdqa32 (%rdi), %zmm2
; CHECK-NEXT: vmovdqa64 (%rdi), %zmm2
; CHECK-NEXT: vpternlogd $114, %zmm1, %zmm0, %zmm2 {%k1}
; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
; CHECK-NEXT: retq
@ -317,7 +317,7 @@ define <16 x i32> @vpternlog_v16i32_102_load1_mask(<16 x i32> %x0, <16 x i32>* %
; CHECK-LABEL: vpternlog_v16i32_102_load1_mask:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vmovdqa32 (%rdi), %zmm2
; CHECK-NEXT: vmovdqa64 (%rdi), %zmm2
; CHECK-NEXT: vpternlogd $114, %zmm1, %zmm0, %zmm2 {%k1}
; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
; CHECK-NEXT: retq
@ -366,7 +366,7 @@ define <16 x i32> @vpternlog_v16i32_210_load2_mask(<16 x i32> %x0, <16 x i32> %x
; CHECK-LABEL: vpternlog_v16i32_210_load2_mask:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vmovdqa32 (%rdi), %zmm2
; CHECK-NEXT: vmovdqa64 (%rdi), %zmm2
; CHECK-NEXT: vpternlogd $114, %zmm0, %zmm1, %zmm2 {%k1}
; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
; CHECK-NEXT: retq
@ -379,7 +379,7 @@ define <16 x i32> @vpternlog_v16i32_021_load0_mask(<16 x i32>* %x0ptr, <16 x i32
; CHECK-LABEL: vpternlog_v16i32_021_load0_mask:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vmovdqa32 (%rdi), %zmm2
; CHECK-NEXT: vmovdqa64 (%rdi), %zmm2
; CHECK-NEXT: vpternlogd $114, %zmm0, %zmm1, %zmm2 {%k1}
; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
; CHECK-NEXT: retq

View File

@ -123,7 +123,7 @@ define void @shuffle_v16i32_to_v8i32_1(<16 x i32>* %L, <8 x i32>* %S) nounwind {
;
; AVX512VL-LABEL: shuffle_v16i32_to_v8i32_1:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vmovdqa32 (%rdi), %zmm0
; AVX512VL-NEXT: vmovdqa64 (%rdi), %zmm0
; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [1,3,5,7,9,11,13,15]
; AVX512VL-NEXT: vpermi2d %ymm1, %ymm0, %ymm2
@ -143,7 +143,7 @@ define void @shuffle_v16i32_to_v8i32_1(<16 x i32>* %L, <8 x i32>* %S) nounwind {
;
; AVX512BWVL-LABEL: shuffle_v16i32_to_v8i32_1:
; AVX512BWVL: # %bb.0:
; AVX512BWVL-NEXT: vmovdqa32 (%rdi), %zmm0
; AVX512BWVL-NEXT: vmovdqa64 (%rdi), %zmm0
; AVX512BWVL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512BWVL-NEXT: vmovdqa {{.*#+}} ymm2 = [1,3,5,7,9,11,13,15]
; AVX512BWVL-NEXT: vpermi2d %ymm1, %ymm0, %ymm2

View File

@ -162,7 +162,7 @@ define void @shuffle_v32i16_to_v16i16(<32 x i16>* %L, <16 x i16>* %S) nounwind {
define void @trunc_v16i32_to_v16i16(<32 x i16>* %L, <16 x i16>* %S) nounwind {
; AVX512-LABEL: trunc_v16i32_to_v16i16:
; AVX512: # %bb.0:
; AVX512-NEXT: vmovdqa32 (%rdi), %zmm0
; AVX512-NEXT: vmovdqa64 (%rdi), %zmm0
; AVX512-NEXT: vpmovdw %zmm0, (%rsi)
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@ -186,7 +186,7 @@ define void @shuffle_v16i32_to_v8i32(<16 x i32>* %L, <8 x i32>* %S) nounwind {
;
; AVX512VL-LABEL: shuffle_v16i32_to_v8i32:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vmovdqa32 (%rdi), %zmm0
; AVX512VL-NEXT: vmovdqa64 (%rdi), %zmm0
; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [0,2,4,6,8,10,12,14]
; AVX512VL-NEXT: vpermi2d %ymm1, %ymm0, %ymm2
@ -206,7 +206,7 @@ define void @shuffle_v16i32_to_v8i32(<16 x i32>* %L, <8 x i32>* %S) nounwind {
;
; AVX512BWVL-LABEL: shuffle_v16i32_to_v8i32:
; AVX512BWVL: # %bb.0:
; AVX512BWVL-NEXT: vmovdqa32 (%rdi), %zmm0
; AVX512BWVL-NEXT: vmovdqa64 (%rdi), %zmm0
; AVX512BWVL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512BWVL-NEXT: vmovdqa {{.*#+}} ymm2 = [0,2,4,6,8,10,12,14]
; AVX512BWVL-NEXT: vpermi2d %ymm1, %ymm0, %ymm2
@ -318,7 +318,7 @@ define void @shuffle_v64i8_to_v16i8(<64 x i8>* %L, <16 x i8>* %S) nounwind {
define void @trunc_v16i32_to_v16i8(<64 x i8>* %L, <16 x i8>* %S) nounwind {
; AVX512-LABEL: trunc_v16i32_to_v16i8:
; AVX512: # %bb.0:
; AVX512-NEXT: vmovdqa32 (%rdi), %zmm0
; AVX512-NEXT: vmovdqa64 (%rdi), %zmm0
; AVX512-NEXT: vpmovdb %zmm0, (%rsi)
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq

View File

@ -89,7 +89,7 @@ define <16 x i32> @test_div7_16i32(<16 x i32> %a) nounwind {
; AVX-NEXT: vpshufd {{.*#+}} zmm1 = zmm1[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
; AVX-NEXT: vpshufd {{.*#+}} zmm3 = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
; AVX-NEXT: vpmuldq %zmm1, %zmm3, %zmm1
; AVX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [1,17,3,19,5,21,7,23,9,25,11,27,13,29,15,31]
; AVX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [1,17,3,19,5,21,7,23,9,25,11,27,13,29,15,31]
; AVX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; AVX-NEXT: vpaddd %zmm0, %zmm3, %zmm0
; AVX-NEXT: vpsrld $31, %zmm0, %zmm1
@ -316,7 +316,7 @@ define <16 x i32> @test_rem7_16i32(<16 x i32> %a) nounwind {
; AVX-NEXT: vpshufd {{.*#+}} zmm1 = zmm1[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
; AVX-NEXT: vpshufd {{.*#+}} zmm3 = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
; AVX-NEXT: vpmuldq %zmm1, %zmm3, %zmm1
; AVX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [1,17,3,19,5,21,7,23,9,25,11,27,13,29,15,31]
; AVX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [1,17,3,19,5,21,7,23,9,25,11,27,13,29,15,31]
; AVX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; AVX-NEXT: vpaddd %zmm0, %zmm3, %zmm1
; AVX-NEXT: vpsrld $31, %zmm1, %zmm2

View File

@ -97,7 +97,7 @@ define <16 x i32> @test_div7_16i32(<16 x i32> %a) nounwind {
; AVX-NEXT: vpshufd {{.*#+}} zmm1 = zmm1[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
; AVX-NEXT: vpshufd {{.*#+}} zmm3 = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
; AVX-NEXT: vpmuludq %zmm1, %zmm3, %zmm1
; AVX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [1,17,3,19,5,21,7,23,9,25,11,27,13,29,15,31]
; AVX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [1,17,3,19,5,21,7,23,9,25,11,27,13,29,15,31]
; AVX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; AVX-NEXT: vpsubd %zmm3, %zmm0, %zmm0
; AVX-NEXT: vpsrld $1, %zmm0, %zmm0
@ -327,7 +327,7 @@ define <16 x i32> @test_rem7_16i32(<16 x i32> %a) nounwind {
; AVX-NEXT: vpshufd {{.*#+}} zmm1 = zmm1[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
; AVX-NEXT: vpshufd {{.*#+}} zmm3 = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
; AVX-NEXT: vpmuludq %zmm1, %zmm3, %zmm1
; AVX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [1,17,3,19,5,21,7,23,9,25,11,27,13,29,15,31]
; AVX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [1,17,3,19,5,21,7,23,9,25,11,27,13,29,15,31]
; AVX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; AVX-NEXT: vpsubd %zmm3, %zmm0, %zmm1
; AVX-NEXT: vpsrld $1, %zmm1, %zmm1

View File

@ -333,7 +333,7 @@ define <32 x i16> @constant_rotate_v32i16(<32 x i16> %a) nounwind {
; AVX512F-NEXT: vpmullw %ymm2, %ymm1, %ymm3
; AVX512F-NEXT: vpmullw %ymm2, %ymm0, %ymm2
; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
; AVX512F-NEXT: vmovdqa32 {{.*#+}} zmm4 = [16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1]
; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm4 = [16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1]
; AVX512F-NEXT: vpsrlvd %zmm4, %zmm1, %zmm1
; AVX512F-NEXT: vpmovdw %zmm1, %ymm1
; AVX512F-NEXT: vpor %ymm1, %ymm3, %ymm1
@ -349,7 +349,7 @@ define <32 x i16> @constant_rotate_v32i16(<32 x i16> %a) nounwind {
; AVX512VL-NEXT: vpmullw %ymm2, %ymm1, %ymm3
; AVX512VL-NEXT: vpmullw %ymm2, %ymm0, %ymm2
; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
; AVX512VL-NEXT: vmovdqa32 {{.*#+}} zmm4 = [16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1]
; AVX512VL-NEXT: vmovdqa64 {{.*#+}} zmm4 = [16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1]
; AVX512VL-NEXT: vpsrlvd %zmm4, %zmm1, %zmm1
; AVX512VL-NEXT: vpmovdw %zmm1, %ymm1
; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero

View File

@ -290,7 +290,7 @@ define <32 x i16> @constant_shift_v32i16(<32 x i16> %a) nounwind {
; AVX512DQ-LABEL: constant_shift_v32i16:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vpmovsxwd %ymm0, %zmm0
; AVX512DQ-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; AVX512DQ-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; AVX512DQ-NEXT: vpsravd %zmm2, %zmm0, %zmm0
; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0
; AVX512DQ-NEXT: vpmovsxwd %ymm1, %zmm1

View File

@ -222,7 +222,7 @@ define <32 x i16> @constant_shift_v32i16(<32 x i16> %a) nounwind {
; AVX512DQ-LABEL: constant_shift_v32i16:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
; AVX512DQ-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; AVX512DQ-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; AVX512DQ-NEXT: vpsrlvd %zmm2, %zmm0, %zmm0
; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0
; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero

View File

@ -197,7 +197,7 @@ define <16 x i32> @shuffle_v16i32_02_zz_03_zz_06_zz_07_zz_0a_zz_0b_zz_0e_zz_0f_z
define <16 x i32> @shuffle_v16i32_01_02_03_16_05_06_07_20_09_10_11_24_13_14_15_28(<16 x i32> %a, <16 x i32> %b) {
; AVX512F-LABEL: shuffle_v16i32_01_02_03_16_05_06_07_20_09_10_11_24_13_14_15_28:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vmovdqa32 {{.*#+}} zmm2 = [1,2,3,16,5,6,7,20,9,10,11,24,13,14,15,28]
; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [1,2,3,16,5,6,7,20,9,10,11,24,13,14,15,28]
; AVX512F-NEXT: vpermt2d %zmm1, %zmm2, %zmm0
; AVX512F-NEXT: retq
;
@ -232,7 +232,7 @@ define <16 x i32> @shuffle_v16i32_02_05_u_u_07_u_0a_01_00_05_u_04_07_u_0a_01(<16
define <16 x i32> @shuffle_v16i32_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19_08_18(<16 x i32> %a, <16 x i32> %b) {
; ALL-LABEL: shuffle_v16i32_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19_08_18:
; ALL: # %bb.0:
; ALL-NEXT: vmovdqa32 {{.*#+}} zmm2 = [15,31,14,22,13,29,4,28,11,27,10,26,9,25,8,24]
; ALL-NEXT: vmovdqa64 {{.*#+}} zmm2 = [15,31,14,22,13,29,4,28,11,27,10,26,9,25,8,24]
; ALL-NEXT: vpermt2d %zmm1, %zmm2, %zmm0
; ALL-NEXT: retq
%c = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 15, i32 31, i32 14, i32 22, i32 13, i32 29, i32 4, i32 28, i32 11, i32 27, i32 10, i32 26, i32 9, i32 25, i32 8, i32 24>
@ -263,7 +263,7 @@ define <16 x float> @shuffle_v16f32_load_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_
define <16 x i32> @shuffle_v16i32_load_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19_08_18(<16 x i32> %a, <16 x i32>* %b) {
; ALL-LABEL: shuffle_v16i32_load_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19_08_18:
; ALL: # %bb.0:
; ALL-NEXT: vmovdqa32 {{.*#+}} zmm1 = [15,31,14,22,13,29,4,28,11,27,10,26,9,25,8,24]
; ALL-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,31,14,22,13,29,4,28,11,27,10,26,9,25,8,24]
; ALL-NEXT: vpermt2d (%rdi), %zmm1, %zmm0
; ALL-NEXT: retq
%c = load <16 x i32>, <16 x i32>* %b

View File

@ -512,18 +512,18 @@ define <16 x i32> @combine_vpermt2var_16i32_identity_mask(<16 x i32> %x0, <16 x
; X32-LABEL: combine_vpermt2var_16i32_identity_mask:
; X32: # %bb.0:
; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
; X32-NEXT: vmovdqa32 {{.*#+}} zmm2 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
; X32-NEXT: vmovdqa64 {{.*#+}} zmm2 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
; X32-NEXT: vpermi2d %zmm1, %zmm0, %zmm2 {%k1} {z}
; X32-NEXT: vmovdqa32 {{.*#+}} zmm0 = [15,30,13,28,11,26,9,24,7,22,5,20,3,18,1,16]
; X32-NEXT: vmovdqa64 {{.*#+}} zmm0 = [15,30,13,28,11,26,9,24,7,22,5,20,3,18,1,16]
; X32-NEXT: vpermi2d %zmm2, %zmm2, %zmm0 {%k1} {z}
; X32-NEXT: retl
;
; X64-LABEL: combine_vpermt2var_16i32_identity_mask:
; X64: # %bb.0:
; X64-NEXT: kmovd %edi, %k1
; X64-NEXT: vmovdqa32 {{.*#+}} zmm2 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
; X64-NEXT: vmovdqa64 {{.*#+}} zmm2 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
; X64-NEXT: vpermi2d %zmm1, %zmm0, %zmm2 {%k1} {z}
; X64-NEXT: vmovdqa32 {{.*#+}} zmm0 = [15,30,13,28,11,26,9,24,7,22,5,20,3,18,1,16]
; X64-NEXT: vmovdqa64 {{.*#+}} zmm0 = [15,30,13,28,11,26,9,24,7,22,5,20,3,18,1,16]
; X64-NEXT: vpermi2d %zmm2, %zmm2, %zmm0 {%k1} {z}
; X64-NEXT: retq
%res0 = call <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>, <16 x i32> %x0, <16 x i32> %x1, i16 %m)
@ -1045,13 +1045,13 @@ define <8 x double> @combine_vpermi2var_vpermt2var_8f64_as_vperm2(<8 x double> %
define <16 x i32> @combine_vpermi2var_vpermt2var_16i32_as_vpermd(<16 x i32> %x0, <16 x i32> %x1) {
; X32-LABEL: combine_vpermi2var_vpermt2var_16i32_as_vpermd:
; X32: # %bb.0:
; X32-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,31,2,2,4,29,6,27,8,25,10,23,12,21,14,19]
; X32-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,31,2,2,4,29,6,27,8,25,10,23,12,21,14,19]
; X32-NEXT: vpermt2d %zmm1, %zmm2, %zmm0
; X32-NEXT: retl
;
; X64-LABEL: combine_vpermi2var_vpermt2var_16i32_as_vpermd:
; X64: # %bb.0:
; X64-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,31,2,2,4,29,6,27,8,25,10,23,12,21,14,19]
; X64-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,31,2,2,4,29,6,27,8,25,10,23,12,21,14,19]
; X64-NEXT: vpermt2d %zmm1, %zmm2, %zmm0
; X64-NEXT: retq
%res0 = call <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32> %x0, <16 x i32> <i32 0, i32 31, i32 2, i32 29, i32 4, i32 27, i32 6, i32 25, i32 8, i32 23, i32 10, i32 21, i32 12, i32 19, i32 14, i32 17>, <16 x i32> %x1, i16 -1)

View File

@ -171,7 +171,7 @@ define <16 x i1> @shuf16i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0(<16 x i32> %a, <1
; AVX512F-NEXT: vpcmpeqd %zmm3, %zmm1, %k2
; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z}
; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; AVX512F-NEXT: vmovdqa32 {{.*#+}} zmm2 = [3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0]
; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0]
; AVX512F-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
; AVX512F-NEXT: vptestmd %zmm2, %zmm2, %k1
; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
@ -185,7 +185,7 @@ define <16 x i1> @shuf16i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0(<16 x i32> %a, <1
; AVX512VL-NEXT: vpcmpeqd %zmm3, %zmm1, %k2
; AVX512VL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z}
; AVX512VL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; AVX512VL-NEXT: vmovdqa32 {{.*#+}} zmm2 = [3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0]
; AVX512VL-NEXT: vmovdqa64 {{.*#+}} zmm2 = [3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0]
; AVX512VL-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
; AVX512VL-NEXT: vptestmd %zmm2, %zmm2, %k1
; AVX512VL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
@ -199,7 +199,7 @@ define <16 x i1> @shuf16i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0(<16 x i32> %a, <1
; VL_BW_DQ-NEXT: vpcmpeqd %zmm3, %zmm1, %k1
; VL_BW_DQ-NEXT: vpmovm2d %k1, %zmm0
; VL_BW_DQ-NEXT: vpmovm2d %k0, %zmm1
; VL_BW_DQ-NEXT: vmovdqa32 {{.*#+}} zmm2 = [3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0]
; VL_BW_DQ-NEXT: vmovdqa64 {{.*#+}} zmm2 = [3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0]
; VL_BW_DQ-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
; VL_BW_DQ-NEXT: vpmovd2m %zmm2, %k0
; VL_BW_DQ-NEXT: vpmovm2b %k0, %xmm0