From 1a378ba17f77d93c78d3f55cf438ff536ce3c4d1 Mon Sep 17 00:00:00 2001 From: Igor Breger Date: Wed, 15 Jun 2016 07:30:38 +0000 Subject: [PATCH] [AVX512] Fix BLENDM lowering patterns. Operands should be swapped to match SELECT behavior. Use BLENDM instead of masked move instruction. Differential Revision: http://reviews.llvm.org/D21001 llvm-svn: 272763 --- lib/Target/X86/X86InstrAVX512.td | 15 +-- test/CodeGen/X86/avx512-bugfix-23634.ll | 2 +- test/CodeGen/X86/avx512-mov.ll | 16 ++-- test/CodeGen/X86/avx512-vec-cmp.ll | 111 ++++++++-------------- test/CodeGen/X86/avx512bw-mov.ll | 4 +- test/CodeGen/X86/avx512bw-vec-cmp.ll | 36 +++---- test/CodeGen/X86/avx512bwvl-mov.ll | 8 +- test/CodeGen/X86/avx512bwvl-vec-cmp.ll | 72 +++++--------- test/CodeGen/X86/avx512vl-mov.ll | 32 +++---- test/CodeGen/X86/avx512vl-vec-cmp.ll | 96 +++++++------------ test/CodeGen/X86/masked_gather_scatter.ll | 23 +++-- 11 files changed, 156 insertions(+), 259 deletions(-) diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index fb00ff7f0ea..5bd9dc7a527 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -1242,8 +1242,9 @@ multiclass avx512_blendmask opc, string OpcodeStr, X86VectorVTInfo _> { (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"), - [(set _.RC:$dst, (X86select _.KRCWM:$mask, (_.VT _.RC:$src1), - (_.VT _.RC:$src2)))]>, EVEX_4V, EVEX_K; + [(set _.RC:$dst, (vselect _.KRCWM:$mask, + (_.VT _.RC:$src2), + (_.VT _.RC:$src1)))]>, EVEX_4V, EVEX_K; let hasSideEffects = 0 in def rrkz : AVX5128I opc, string OpcodeStr, X86VectorVTInfo _> { (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"), - [(set _.RC:$dst, (X86select _.KRCWM:$mask, (_.VT _.RC:$src1), - (_.VT (bitconvert (_.LdFrag addr:$src2)))))]>, + [(set _.RC:$dst, (vselect _.KRCWM:$mask, + (_.VT (bitconvert (_.LdFrag addr:$src2))), + (_.VT _.RC:$src1)))]>, EVEX_4V, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>; let mayLoad = 1, hasSideEffects = 0 in def rmkz : AVX5128I opc, string OpcodeStr, X86VectorVTInfo _ !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|", "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"), - [(set _.RC:$dst,(X86select _.KRCWM:$mask, (_.VT _.RC:$src1), - (X86VBroadcast (_.ScalarLdFrag addr:$src2))))]>, + [(set _.RC:$dst,(vselect _.KRCWM:$mask, + (X86VBroadcast (_.ScalarLdFrag addr:$src2)), + (_.VT _.RC:$src1)))]>, EVEX_4V, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>; let mayLoad = 1, hasSideEffects = 0 in diff --git a/test/CodeGen/X86/avx512-bugfix-23634.ll b/test/CodeGen/X86/avx512-bugfix-23634.ll index e66eefdb8e9..0dcfb7c169f 100644 --- a/test/CodeGen/X86/avx512-bugfix-23634.ll +++ b/test/CodeGen/X86/avx512-bugfix-23634.ll @@ -15,7 +15,7 @@ define void @f_fu(float* %ret, float* %aa, float %b) { ; CHECK-NEXT: vpsrad $1, %zmm2, %zmm2 ; CHECK-NEXT: movw $-21846, %ax ## imm = 0xAAAA ; CHECK-NEXT: kmovw %eax, %k1 -; CHECK-NEXT: vmovdqa32 {{.*}}(%rip), %zmm1 {%k1} +; CHECK-NEXT: vpblendmd {{.*}}(%rip), %zmm1, %zmm1 {%k1} ; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0 ; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0 ; CHECK-NEXT: vcvtdq2ps %zmm0, %zmm0 diff --git a/test/CodeGen/X86/avx512-mov.ll b/test/CodeGen/X86/avx512-mov.ll index 2e2ad585c73..6b07e9e704d 100644 --- a/test/CodeGen/X86/avx512-mov.ll +++ b/test/CodeGen/X86/avx512-mov.ll @@ -313,7 +313,7 @@ define <16 x i32> @test32(i8 * %addr, <16 x i32> %old, <16 x i32> %mask1) { ; CHECK: ## BB#0: ; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0xef,0xd2] ; CHECK-NEXT: vpcmpneqd %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf3,0x75,0x48,0x1f,0xca,0x04] -; CHECK-NEXT: vmovdqa32 (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x6f,0x07] +; CHECK-NEXT: vpblendmd (%rdi), %zmm0, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x64,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %mask = icmp ne <16 x i32> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <16 x i32>* @@ -327,7 +327,7 @@ define <16 x i32> @test33(i8 * %addr, <16 x i32> %old, <16 x i32> %mask1) { ; CHECK: ## BB#0: ; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0xef,0xd2] ; CHECK-NEXT: vpcmpneqd %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf3,0x75,0x48,0x1f,0xca,0x04] -; CHECK-NEXT: vmovdqu32 (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf1,0x7e,0x49,0x6f,0x07] +; CHECK-NEXT: vpblendmd (%rdi), %zmm0, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x64,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %mask = icmp ne <16 x i32> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <16 x i32>* @@ -369,7 +369,7 @@ define <8 x i64> @test36(i8 * %addr, <8 x i64> %old, <8 x i64> %mask1) { ; CHECK: ## BB#0: ; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0xef,0xd2] ; CHECK-NEXT: vpcmpneqq %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x48,0x1f,0xca,0x04] -; CHECK-NEXT: vmovdqa64 (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x6f,0x07] +; CHECK-NEXT: vpblendmq (%rdi), %zmm0, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x64,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %mask = icmp ne <8 x i64> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <8 x i64>* @@ -383,7 +383,7 @@ define <8 x i64> @test37(i8 * %addr, <8 x i64> %old, <8 x i64> %mask1) { ; CHECK: ## BB#0: ; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0xef,0xd2] ; CHECK-NEXT: vpcmpneqq %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x48,0x1f,0xca,0x04] -; CHECK-NEXT: vmovdqu64 (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf1,0xfe,0x49,0x6f,0x07] +; CHECK-NEXT: vpblendmq (%rdi), %zmm0, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x64,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %mask = icmp ne <8 x i64> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <8 x i64>* @@ -426,7 +426,7 @@ define <16 x float> @test40(i8 * %addr, <16 x float> %old, <16 x float> %mask1) ; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0xef,0xd2] ; CHECK-NEXT: vcmpordps %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf1,0x74,0x48,0xc2,0xca,0x07] ; CHECK-NEXT: vcmpneqps %zmm2, %zmm1, %k1 {%k1} ## encoding: [0x62,0xf1,0x74,0x49,0xc2,0xca,0x04] -; CHECK-NEXT: vmovaps (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x28,0x07] +; CHECK-NEXT: vblendmps (%rdi), %zmm0, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x65,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %mask = fcmp one <16 x float> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <16 x float>* @@ -441,7 +441,7 @@ define <16 x float> @test41(i8 * %addr, <16 x float> %old, <16 x float> %mask1) ; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0xef,0xd2] ; CHECK-NEXT: vcmpordps %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf1,0x74,0x48,0xc2,0xca,0x07] ; CHECK-NEXT: vcmpneqps %zmm2, %zmm1, %k1 {%k1} ## encoding: [0x62,0xf1,0x74,0x49,0xc2,0xca,0x04] -; CHECK-NEXT: vmovups (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x10,0x07] +; CHECK-NEXT: vblendmps (%rdi), %zmm0, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x65,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %mask = fcmp one <16 x float> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <16 x float>* @@ -486,7 +486,7 @@ define <8 x double> @test44(i8 * %addr, <8 x double> %old, <8 x double> %mask1) ; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0xef,0xd2] ; CHECK-NEXT: vcmpordpd %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf1,0xf5,0x48,0xc2,0xca,0x07] ; CHECK-NEXT: vcmpneqpd %zmm2, %zmm1, %k1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0xc2,0xca,0x04] -; CHECK-NEXT: vmovapd (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x28,0x07] +; CHECK-NEXT: vblendmpd (%rdi), %zmm0, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x65,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %mask = fcmp one <8 x double> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <8 x double>* @@ -501,7 +501,7 @@ define <8 x double> @test45(i8 * %addr, <8 x double> %old, <8 x double> %mask1) ; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0xef,0xd2] ; CHECK-NEXT: vcmpordpd %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf1,0xf5,0x48,0xc2,0xca,0x07] ; CHECK-NEXT: vcmpneqpd %zmm2, %zmm1, %k1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0xc2,0xca,0x04] -; CHECK-NEXT: vmovupd (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x10,0x07] +; CHECK-NEXT: vblendmpd (%rdi), %zmm0, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x65,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %mask = fcmp one <8 x double> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <8 x double>* diff --git a/test/CodeGen/X86/avx512-vec-cmp.ll b/test/CodeGen/X86/avx512-vec-cmp.ll index 8f29c9d5c16..c9ec032ba37 100644 --- a/test/CodeGen/X86/avx512-vec-cmp.ll +++ b/test/CodeGen/X86/avx512-vec-cmp.ll @@ -6,8 +6,7 @@ define <16 x float> @test1(<16 x float> %x, <16 x float> %y) nounwind { ; CHECK-LABEL: test1: ; CHECK: ## BB#0: ; CHECK-NEXT: vcmpleps %zmm1, %zmm0, %k1 -; CHECK-NEXT: vmovaps %zmm0, %zmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq %mask = fcmp ole <16 x float> %x, %y %max = select <16 x i1> %mask, <16 x float> %x, <16 x float> %y @@ -18,8 +17,7 @@ define <8 x double> @test2(<8 x double> %x, <8 x double> %y) nounwind { ; CHECK-LABEL: test2: ; CHECK: ## BB#0: ; CHECK-NEXT: vcmplepd %zmm1, %zmm0, %k1 -; CHECK-NEXT: vmovapd %zmm0, %zmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq %mask = fcmp ole <8 x double> %x, %y %max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %y @@ -30,8 +28,7 @@ define <16 x i32> @test3(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %yp) nounwin ; CHECK-LABEL: test3: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpeqd (%rdi), %zmm0, %k1 -; CHECK-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq %y = load <16 x i32>, <16 x i32>* %yp, align 4 %mask = icmp eq <16 x i32> %x, %y @@ -43,8 +40,7 @@ define <16 x i32> @test4_unsigned(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1) ; CHECK-LABEL: test4_unsigned: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpnltud %zmm1, %zmm0, %k1 -; CHECK-NEXT: vmovdqa32 %zmm2, %zmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vpblendmd %zmm2, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq %mask = icmp uge <16 x i32> %x, %y %max = select <16 x i1> %mask, <16 x i32> %x1, <16 x i32> %y @@ -55,8 +51,7 @@ define <8 x i64> @test5(<8 x i64> %x, <8 x i64> %y) nounwind { ; CHECK-LABEL: test5: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k1 -; CHECK-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq %mask = icmp eq <8 x i64> %x, %y %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %y @@ -67,8 +62,7 @@ define <8 x i64> @test6_unsigned(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1) noun ; CHECK-LABEL: test6_unsigned: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpnleuq %zmm1, %zmm0, %k1 -; CHECK-NEXT: vmovdqa64 %zmm2, %zmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vpblendmq %zmm2, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq %mask = icmp ugt <8 x i64> %x, %y %max = select <8 x i1> %mask, <8 x i64> %x1, <8 x i64> %y @@ -87,8 +81,7 @@ define <4 x float> @test7(<4 x float> %a, <4 x float> %b) { ; SKX: ## BB#0: ; SKX-NEXT: vpxord %xmm2, %xmm2, %xmm2 ; SKX-NEXT: vcmpltps %xmm2, %xmm0, %k1 -; SKX-NEXT: vmovaps %xmm0, %xmm1 {%k1} -; SKX-NEXT: vmovaps %zmm1, %zmm0 +; SKX-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1} ; SKX-NEXT: retq %mask = fcmp olt <4 x float> %a, zeroinitializer @@ -108,8 +101,7 @@ define <2 x double> @test8(<2 x double> %a, <2 x double> %b) { ; SKX: ## BB#0: ; SKX-NEXT: vpxord %xmm2, %xmm2, %xmm2 ; SKX-NEXT: vcmpltpd %xmm2, %xmm0, %k1 -; SKX-NEXT: vmovapd %xmm0, %xmm1 {%k1} -; SKX-NEXT: vmovaps %zmm1, %zmm0 +; SKX-NEXT: vblendmpd %xmm0, %xmm1, %xmm0 {%k1} ; SKX-NEXT: retq %mask = fcmp olt <2 x double> %a, zeroinitializer %c = select <2 x i1>%mask, <2 x double>%a, <2 x double>%b @@ -126,8 +118,7 @@ define <8 x i32> @test9(<8 x i32> %x, <8 x i32> %y) nounwind { ; SKX-LABEL: test9: ; SKX: ## BB#0: ; SKX-NEXT: vpcmpeqd %ymm1, %ymm0, %k1 -; SKX-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1} -; SKX-NEXT: vmovaps %zmm1, %zmm0 +; SKX-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} ; SKX-NEXT: retq %mask = icmp eq <8 x i32> %x, %y %max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %y @@ -144,8 +135,7 @@ define <8 x float> @test10(<8 x float> %x, <8 x float> %y) nounwind { ; SKX-LABEL: test10: ; SKX: ## BB#0: ; SKX-NEXT: vcmpeqps %ymm1, %ymm0, %k1 -; SKX-NEXT: vmovaps %ymm0, %ymm1 {%k1} -; SKX-NEXT: vmovaps %zmm1, %zmm0 +; SKX-NEXT: vblendmps %ymm0, %ymm1, %ymm0 {%k1} ; SKX-NEXT: retq %mask = fcmp oeq <8 x float> %x, %y @@ -248,8 +238,7 @@ define <16 x i32> @test16(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1) nounwind ; CHECK-LABEL: test16: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpled %zmm0, %zmm1, %k1 -; CHECK-NEXT: vmovdqa32 %zmm2, %zmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vpblendmd %zmm2, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq %mask = icmp sge <16 x i32> %x, %y %max = select <16 x i1> %mask, <16 x i32> %x1, <16 x i32> %y @@ -260,8 +249,7 @@ define <16 x i32> @test17(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %y.ptr) nou ; CHECK-LABEL: test17: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpgtd (%rdi), %zmm0, %k1 -; CHECK-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq %y = load <16 x i32>, <16 x i32>* %y.ptr, align 4 %mask = icmp sgt <16 x i32> %x, %y @@ -273,8 +261,7 @@ define <16 x i32> @test18(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %y.ptr) nou ; CHECK-LABEL: test18: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpled (%rdi), %zmm0, %k1 -; CHECK-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq %y = load <16 x i32>, <16 x i32>* %y.ptr, align 4 %mask = icmp sle <16 x i32> %x, %y @@ -286,8 +273,7 @@ define <16 x i32> @test19(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %y.ptr) nou ; CHECK-LABEL: test19: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpleud (%rdi), %zmm0, %k1 -; CHECK-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq %y = load <16 x i32>, <16 x i32>* %y.ptr, align 4 %mask = icmp ule <16 x i32> %x, %y @@ -300,8 +286,7 @@ define <16 x i32> @test20(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1, <16 x i3 ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k1 ; CHECK-NEXT: vpcmpeqd %zmm3, %zmm2, %k1 {%k1} -; CHECK-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq %mask1 = icmp eq <16 x i32> %x1, %y1 %mask0 = icmp eq <16 x i32> %x, %y @@ -315,8 +300,7 @@ define <8 x i64> @test21(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1, <8 x i64> %y ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpleq %zmm1, %zmm0, %k1 ; CHECK-NEXT: vpcmpleq %zmm2, %zmm3, %k1 {%k1} -; CHECK-NEXT: vmovdqa64 %zmm0, %zmm2 {%k1} -; CHECK-NEXT: vmovaps %zmm2, %zmm0 +; CHECK-NEXT: vpblendmq %zmm0, %zmm2, %zmm0 {%k1} ; CHECK-NEXT: retq %mask1 = icmp sge <8 x i64> %x1, %y1 %mask0 = icmp sle <8 x i64> %x, %y @@ -330,8 +314,7 @@ define <8 x i64> @test22(<8 x i64> %x, <8 x i64>* %y.ptr, <8 x i64> %x1, <8 x i6 ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpgtq %zmm2, %zmm1, %k1 ; CHECK-NEXT: vpcmpgtq (%rdi), %zmm0, %k1 {%k1} -; CHECK-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq %mask1 = icmp sgt <8 x i64> %x1, %y1 %y = load <8 x i64>, <8 x i64>* %y.ptr, align 4 @@ -346,8 +329,7 @@ define <16 x i32> @test23(<16 x i32> %x, <16 x i32>* %y.ptr, <16 x i32> %x1, <16 ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpled %zmm1, %zmm2, %k1 ; CHECK-NEXT: vpcmpleud (%rdi), %zmm0, %k1 {%k1} -; CHECK-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq %mask1 = icmp sge <16 x i32> %x1, %y1 %y = load <16 x i32>, <16 x i32>* %y.ptr, align 4 @@ -361,8 +343,7 @@ define <8 x i64> @test24(<8 x i64> %x, <8 x i64> %x1, i64* %yb.ptr) nounwind { ; CHECK-LABEL: test24: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k1 -; CHECK-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq %yb = load i64, i64* %yb.ptr, align 4 %y.0 = insertelement <8 x i64> undef, i64 %yb, i32 0 @@ -376,8 +357,7 @@ define <16 x i32> @test25(<16 x i32> %x, i32* %yb.ptr, <16 x i32> %x1) nounwind ; CHECK-LABEL: test25: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpled (%rdi){1to16}, %zmm0, %k1 -; CHECK-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq %yb = load i32, i32* %yb.ptr, align 4 %y.0 = insertelement <16 x i32> undef, i32 %yb, i32 0 @@ -392,8 +372,7 @@ define <16 x i32> @test26(<16 x i32> %x, i32* %yb.ptr, <16 x i32> %x1, <16 x i32 ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpled %zmm1, %zmm2, %k1 ; CHECK-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k1 {%k1} -; CHECK-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq %mask1 = icmp sge <16 x i32> %x1, %y1 %yb = load i32, i32* %yb.ptr, align 4 @@ -410,8 +389,7 @@ define <8 x i64> @test27(<8 x i64> %x, i64* %yb.ptr, <8 x i64> %x1, <8 x i64> %y ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpleq %zmm1, %zmm2, %k1 ; CHECK-NEXT: vpcmpleq (%rdi){1to8}, %zmm0, %k1 {%k1} -; CHECK-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq %mask1 = icmp sge <8 x i64> %x1, %y1 %yb = load i64, i64* %yb.ptr, align 4 @@ -481,8 +459,7 @@ define <4 x double> @test30(<4 x double> %x, <4 x double> %y) nounwind { ; SKX-LABEL: test30: ; SKX: ## BB#0: ; SKX-NEXT: vcmpeqpd %ymm1, %ymm0, %k1 -; SKX-NEXT: vmovapd %ymm0, %ymm1 {%k1} -; SKX-NEXT: vmovaps %zmm1, %zmm0 +; SKX-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1} ; SKX-NEXT: retq %mask = fcmp oeq <4 x double> %x, %y @@ -500,8 +477,7 @@ define <2 x double> @test31(<2 x double> %x, <2 x double> %x1, <2 x double>* %yp ; SKX-LABEL: test31: ; SKX: ## BB#0: ; SKX-NEXT: vcmpltpd (%rdi), %xmm0, %k1 -; SKX-NEXT: vmovapd %xmm0, %xmm1 {%k1} -; SKX-NEXT: vmovaps %zmm1, %zmm0 +; SKX-NEXT: vblendmpd %xmm0, %xmm1, %xmm0 {%k1} ; SKX-NEXT: retq %y = load <2 x double>, <2 x double>* %yp, align 4 @@ -520,8 +496,7 @@ define <4 x double> @test32(<4 x double> %x, <4 x double> %x1, <4 x double>* %yp ; SKX-LABEL: test32: ; SKX: ## BB#0: ; SKX-NEXT: vcmpltpd (%rdi), %ymm0, %k1 -; SKX-NEXT: vmovapd %ymm0, %ymm1 {%k1} -; SKX-NEXT: vmovaps %zmm1, %zmm0 +; SKX-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1} ; SKX-NEXT: retq %y = load <4 x double>, <4 x double>* %yp, align 4 @@ -534,8 +509,7 @@ define <8 x double> @test33(<8 x double> %x, <8 x double> %x1, <8 x double>* %yp ; CHECK-LABEL: test33: ; CHECK: ## BB#0: ; CHECK-NEXT: vcmpltpd (%rdi), %zmm0, %k1 -; CHECK-NEXT: vmovapd %zmm0, %zmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq %y = load <8 x double>, <8 x double>* %yp, align 4 %mask = fcmp olt <8 x double> %x, %y @@ -553,8 +527,7 @@ define <4 x float> @test34(<4 x float> %x, <4 x float> %x1, <4 x float>* %yp) no ; SKX-LABEL: test34: ; SKX: ## BB#0: ; SKX-NEXT: vcmpltps (%rdi), %xmm0, %k1 -; SKX-NEXT: vmovaps %xmm0, %xmm1 {%k1} -; SKX-NEXT: vmovaps %zmm1, %zmm0 +; SKX-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1} ; SKX-NEXT: retq %y = load <4 x float>, <4 x float>* %yp, align 4 %mask = fcmp olt <4 x float> %x, %y @@ -573,8 +546,7 @@ define <8 x float> @test35(<8 x float> %x, <8 x float> %x1, <8 x float>* %yp) no ; SKX-LABEL: test35: ; SKX: ## BB#0: ; SKX-NEXT: vcmpltps (%rdi), %ymm0, %k1 -; SKX-NEXT: vmovaps %ymm0, %ymm1 {%k1} -; SKX-NEXT: vmovaps %zmm1, %zmm0 +; SKX-NEXT: vblendmps %ymm0, %ymm1, %ymm0 {%k1} ; SKX-NEXT: retq %y = load <8 x float>, <8 x float>* %yp, align 4 @@ -587,8 +559,7 @@ define <16 x float> @test36(<16 x float> %x, <16 x float> %x1, <16 x float>* %yp ; CHECK-LABEL: test36: ; CHECK: ## BB#0: ; CHECK-NEXT: vcmpltps (%rdi), %zmm0, %k1 -; CHECK-NEXT: vmovaps %zmm0, %zmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq %y = load <16 x float>, <16 x float>* %yp, align 4 %mask = fcmp olt <16 x float> %x, %y @@ -600,8 +571,7 @@ define <8 x double> @test37(<8 x double> %x, <8 x double> %x1, double* %ptr) nou ; CHECK-LABEL: test37: ; CHECK: ## BB#0: ; CHECK-NEXT: vcmpltpd (%rdi){1to8}, %zmm0, %k1 -; CHECK-NEXT: vmovapd %zmm0, %zmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq %a = load double, double* %ptr @@ -624,8 +594,7 @@ define <4 x double> @test38(<4 x double> %x, <4 x double> %x1, double* %ptr) nou ; SKX-LABEL: test38: ; SKX: ## BB#0: ; SKX-NEXT: vcmpltpd (%rdi){1to4}, %ymm0, %k1 -; SKX-NEXT: vmovapd %ymm0, %ymm1 {%k1} -; SKX-NEXT: vmovaps %zmm1, %zmm0 +; SKX-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1} ; SKX-NEXT: retq %a = load double, double* %ptr @@ -648,8 +617,7 @@ define <2 x double> @test39(<2 x double> %x, <2 x double> %x1, double* %ptr) nou ; SKX-LABEL: test39: ; SKX: ## BB#0: ; SKX-NEXT: vcmpltpd (%rdi){1to2}, %xmm0, %k1 -; SKX-NEXT: vmovapd %xmm0, %xmm1 {%k1} -; SKX-NEXT: vmovaps %zmm1, %zmm0 +; SKX-NEXT: vblendmpd %xmm0, %xmm1, %xmm0 {%k1} ; SKX-NEXT: retq %a = load double, double* %ptr @@ -666,8 +634,7 @@ define <16 x float> @test40(<16 x float> %x, <16 x float> %x1, float* %ptr) n ; CHECK-LABEL: test40: ; CHECK: ## BB#0: ; CHECK-NEXT: vcmpltps (%rdi){1to16}, %zmm0, %k1 -; CHECK-NEXT: vmovaps %zmm0, %zmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq %a = load float, float* %ptr @@ -690,8 +657,7 @@ define <8 x float> @test41(<8 x float> %x, <8 x float> %x1, float* %ptr) noun ; SKX-LABEL: test41: ; SKX: ## BB#0: ; SKX-NEXT: vcmpltps (%rdi){1to8}, %ymm0, %k1 -; SKX-NEXT: vmovaps %ymm0, %ymm1 {%k1} -; SKX-NEXT: vmovaps %zmm1, %zmm0 +; SKX-NEXT: vblendmps %ymm0, %ymm1, %ymm0 {%k1} ; SKX-NEXT: retq %a = load float, float* %ptr @@ -714,8 +680,7 @@ define <4 x float> @test42(<4 x float> %x, <4 x float> %x1, float* %ptr) noun ; SKX-LABEL: test42: ; SKX: ## BB#0: ; SKX-NEXT: vcmpltps (%rdi){1to4}, %xmm0, %k1 -; SKX-NEXT: vmovaps %xmm0, %xmm1 {%k1} -; SKX-NEXT: vmovaps %zmm1, %zmm0 +; SKX-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1} ; SKX-NEXT: retq %a = load float, float* %ptr @@ -734,8 +699,7 @@ define <8 x double> @test43(<8 x double> %x, <8 x double> %x1, double* %ptr,<8 x ; KNL-NEXT: vpsllq $63, %zmm2, %zmm2 ; KNL-NEXT: vptestmq %zmm2, %zmm2, %k1 ; KNL-NEXT: vcmpltpd (%rdi){1to8}, %zmm0, %k1 {%k1} -; KNL-NEXT: vmovapd %zmm0, %zmm1 {%k1} -; KNL-NEXT: vmovaps %zmm1, %zmm0 +; KNL-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ; KNL-NEXT: retq ; ; SKX-LABEL: test43: @@ -743,8 +707,7 @@ define <8 x double> @test43(<8 x double> %x, <8 x double> %x1, double* %ptr,<8 x ; SKX-NEXT: vpsllw $15, %xmm2, %xmm2 ; SKX-NEXT: vpmovw2m %xmm2, %k1 ; SKX-NEXT: vcmpltpd (%rdi){1to8}, %zmm0, %k1 {%k1} -; SKX-NEXT: vmovapd %zmm0, %zmm1 {%k1} -; SKX-NEXT: vmovaps %zmm1, %zmm0 +; SKX-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ; SKX-NEXT: retq %a = load double, double* %ptr diff --git a/test/CodeGen/X86/avx512bw-mov.ll b/test/CodeGen/X86/avx512bw-mov.ll index e41a68c1ec3..747e4f3b2dc 100644 --- a/test/CodeGen/X86/avx512bw-mov.ll +++ b/test/CodeGen/X86/avx512bw-mov.ll @@ -26,7 +26,7 @@ define <64 x i8> @test3(i8 * %addr, <64 x i8> %old, <64 x i8> %mask1) { ; CHECK: ## BB#0: ; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 ; CHECK-NEXT: vpcmpneqb %zmm2, %zmm1, %k1 -; CHECK-NEXT: vmovdqu8 (%rdi), %zmm0 {%k1} +; CHECK-NEXT: vpblendmb (%rdi), %zmm0, %zmm0 {%k1} ; CHECK-NEXT: retq %mask = icmp ne <64 x i8> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <64 x i8>* @@ -74,7 +74,7 @@ define <32 x i16> @test7(i8 * %addr, <32 x i16> %old, <32 x i16> %mask1) { ; CHECK: ## BB#0: ; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 ; CHECK-NEXT: vpcmpneqw %zmm2, %zmm1, %k1 -; CHECK-NEXT: vmovdqu16 (%rdi), %zmm0 {%k1} +; CHECK-NEXT: vpblendmw (%rdi), %zmm0, %zmm0 {%k1} ; CHECK-NEXT: retq %mask = icmp ne <32 x i16> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <32 x i16>* diff --git a/test/CodeGen/X86/avx512bw-vec-cmp.ll b/test/CodeGen/X86/avx512bw-vec-cmp.ll index 9e61c704443..016837e6130 100644 --- a/test/CodeGen/X86/avx512bw-vec-cmp.ll +++ b/test/CodeGen/X86/avx512bw-vec-cmp.ll @@ -5,8 +5,7 @@ define <64 x i8> @test1(<64 x i8> %x, <64 x i8> %y) nounwind { ; CHECK-LABEL: test1: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpeqb %zmm1, %zmm0, %k1 -; CHECK-NEXT: vmovdqu8 %zmm0, %zmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vpblendmb %zmm0, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq %mask = icmp eq <64 x i8> %x, %y %max = select <64 x i1> %mask, <64 x i8> %x, <64 x i8> %y @@ -17,8 +16,7 @@ define <64 x i8> @test2(<64 x i8> %x, <64 x i8> %y, <64 x i8> %x1) nounwind { ; CHECK-LABEL: test2: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpgtb %zmm1, %zmm0, %k1 -; CHECK-NEXT: vmovdqu8 %zmm2, %zmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vpblendmb %zmm2, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq %mask = icmp sgt <64 x i8> %x, %y %max = select <64 x i1> %mask, <64 x i8> %x1, <64 x i8> %y @@ -29,8 +27,7 @@ define <32 x i16> @test3(<32 x i16> %x, <32 x i16> %y, <32 x i16> %x1) nounwind ; CHECK-LABEL: test3: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmplew %zmm0, %zmm1, %k1 -; CHECK-NEXT: vmovdqu16 %zmm2, %zmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vpblendmw %zmm2, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq %mask = icmp sge <32 x i16> %x, %y %max = select <32 x i1> %mask, <32 x i16> %x1, <32 x i16> %y @@ -41,8 +38,7 @@ define <64 x i8> @test4(<64 x i8> %x, <64 x i8> %y, <64 x i8> %x1) nounwind { ; CHECK-LABEL: test4: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpnleub %zmm1, %zmm0, %k1 -; CHECK-NEXT: vmovdqu8 %zmm2, %zmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vpblendmb %zmm2, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq %mask = icmp ugt <64 x i8> %x, %y %max = select <64 x i1> %mask, <64 x i8> %x1, <64 x i8> %y @@ -53,8 +49,7 @@ define <32 x i16> @test5(<32 x i16> %x, <32 x i16> %x1, <32 x i16>* %yp) nounwin ; CHECK-LABEL: test5: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpeqw (%rdi), %zmm0, %k1 -; CHECK-NEXT: vmovdqu16 %zmm0, %zmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vpblendmw %zmm0, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq %y = load <32 x i16>, <32 x i16>* %yp, align 4 %mask = icmp eq <32 x i16> %x, %y @@ -66,8 +61,7 @@ define <32 x i16> @test6(<32 x i16> %x, <32 x i16> %x1, <32 x i16>* %y.ptr) noun ; CHECK-LABEL: test6: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpgtw (%rdi), %zmm0, %k1 -; CHECK-NEXT: vmovdqu16 %zmm0, %zmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vpblendmw %zmm0, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq %y = load <32 x i16>, <32 x i16>* %y.ptr, align 4 %mask = icmp sgt <32 x i16> %x, %y @@ -79,8 +73,7 @@ define <32 x i16> @test7(<32 x i16> %x, <32 x i16> %x1, <32 x i16>* %y.ptr) noun ; CHECK-LABEL: test7: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmplew (%rdi), %zmm0, %k1 -; CHECK-NEXT: vmovdqu16 %zmm0, %zmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vpblendmw %zmm0, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq %y = load <32 x i16>, <32 x i16>* %y.ptr, align 4 %mask = icmp sle <32 x i16> %x, %y @@ -92,8 +85,7 @@ define <32 x i16> @test8(<32 x i16> %x, <32 x i16> %x1, <32 x i16>* %y.ptr) noun ; CHECK-LABEL: test8: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpleuw (%rdi), %zmm0, %k1 -; CHECK-NEXT: vmovdqu16 %zmm0, %zmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vpblendmw %zmm0, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq %y = load <32 x i16>, <32 x i16>* %y.ptr, align 4 %mask = icmp ule <32 x i16> %x, %y @@ -106,8 +98,7 @@ define <32 x i16> @test9(<32 x i16> %x, <32 x i16> %y, <32 x i16> %x1, <32 x i16 ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpeqw %zmm1, %zmm0, %k1 ; CHECK-NEXT: vpcmpeqw %zmm3, %zmm2, %k1 {%k1} -; CHECK-NEXT: vmovdqu16 %zmm0, %zmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vpblendmw %zmm0, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq %mask1 = icmp eq <32 x i16> %x1, %y1 %mask0 = icmp eq <32 x i16> %x, %y @@ -121,8 +112,7 @@ define <64 x i8> @test10(<64 x i8> %x, <64 x i8> %y, <64 x i8> %x1, <64 x i8> %y ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpleb %zmm1, %zmm0, %k1 ; CHECK-NEXT: vpcmpleb %zmm2, %zmm3, %k1 {%k1} -; CHECK-NEXT: vmovdqu8 %zmm0, %zmm2 {%k1} -; CHECK-NEXT: vmovaps %zmm2, %zmm0 +; CHECK-NEXT: vpblendmb %zmm0, %zmm2, %zmm0 {%k1} ; CHECK-NEXT: retq %mask1 = icmp sge <64 x i8> %x1, %y1 %mask0 = icmp sle <64 x i8> %x, %y @@ -136,8 +126,7 @@ define <64 x i8> @test11(<64 x i8> %x, <64 x i8>* %y.ptr, <64 x i8> %x1, <64 x i ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpgtb %zmm2, %zmm1, %k1 ; CHECK-NEXT: vpcmpgtb (%rdi), %zmm0, %k1 {%k1} -; CHECK-NEXT: vmovdqu8 %zmm0, %zmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vpblendmb %zmm0, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq %mask1 = icmp sgt <64 x i8> %x1, %y1 %y = load <64 x i8>, <64 x i8>* %y.ptr, align 4 @@ -152,8 +141,7 @@ define <32 x i16> @test12(<32 x i16> %x, <32 x i16>* %y.ptr, <32 x i16> %x1, <32 ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmplew %zmm1, %zmm2, %k1 ; CHECK-NEXT: vpcmpleuw (%rdi), %zmm0, %k1 {%k1} -; CHECK-NEXT: vmovdqu16 %zmm0, %zmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vpblendmw %zmm0, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq %mask1 = icmp sge <32 x i16> %x1, %y1 %y = load <32 x i16>, <32 x i16>* %y.ptr, align 4 diff --git a/test/CodeGen/X86/avx512bwvl-mov.ll b/test/CodeGen/X86/avx512bwvl-mov.ll index 64a678ed05c..6bd9c938405 100644 --- a/test/CodeGen/X86/avx512bwvl-mov.ll +++ b/test/CodeGen/X86/avx512bwvl-mov.ll @@ -26,7 +26,7 @@ define <32 x i8> @test_256_3(i8 * %addr, <32 x i8> %old, <32 x i8> %mask1) { ; CHECK: ## BB#0: ; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0xef,0xd2] ; CHECK-NEXT: vpcmpneqb %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf3,0x75,0x28,0x3f,0xca,0x04] -; CHECK-NEXT: vmovdqu8 (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0x7f,0x29,0x6f,0x07] +; CHECK-NEXT: vpblendmb (%rdi), %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x66,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %mask = icmp ne <32 x i8> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <32 x i8>* @@ -74,7 +74,7 @@ define <16 x i16> @test_256_7(i8 * %addr, <16 x i16> %old, <16 x i16> %mask1) { ; CHECK: ## BB#0: ; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0xef,0xd2] ; CHECK-NEXT: vpcmpneqw %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x28,0x3f,0xca,0x04] -; CHECK-NEXT: vmovdqu16 (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0xff,0x29,0x6f,0x07] +; CHECK-NEXT: vpblendmw (%rdi), %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x66,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %mask = icmp ne <16 x i16> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <16 x i16>* @@ -122,7 +122,7 @@ define <16 x i8> @test_128_3(i8 * %addr, <16 x i8> %old, <16 x i8> %mask1) { ; CHECK: ## BB#0: ; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2] ; CHECK-NEXT: vpcmpneqb %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0x75,0x08,0x3f,0xca,0x04] -; CHECK-NEXT: vmovdqu8 (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0x7f,0x09,0x6f,0x07] +; CHECK-NEXT: vpblendmb (%rdi), %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x66,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %mask = icmp ne <16 x i8> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <16 x i8>* @@ -170,7 +170,7 @@ define <8 x i16> @test_128_7(i8 * %addr, <8 x i16> %old, <8 x i16> %mask1) { ; CHECK: ## BB#0: ; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2] ; CHECK-NEXT: vpcmpneqw %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x08,0x3f,0xca,0x04] -; CHECK-NEXT: vmovdqu16 (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0xff,0x09,0x6f,0x07] +; CHECK-NEXT: vpblendmw (%rdi), %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x66,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %mask = icmp ne <8 x i16> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <8 x i16>* diff --git a/test/CodeGen/X86/avx512bwvl-vec-cmp.ll b/test/CodeGen/X86/avx512bwvl-vec-cmp.ll index c258ef394d8..17e581bbb50 100644 --- a/test/CodeGen/X86/avx512bwvl-vec-cmp.ll +++ b/test/CodeGen/X86/avx512bwvl-vec-cmp.ll @@ -5,8 +5,7 @@ define <32 x i8> @test256_1(<32 x i8> %x, <32 x i8> %y) nounwind { ; CHECK-LABEL: test256_1: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpeqb %ymm1, %ymm0, %k1 -; CHECK-NEXT: vmovdqu8 %ymm0, %ymm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vpblendmb %ymm0, %ymm1, %ymm0 {%k1} ; CHECK-NEXT: retq %mask = icmp eq <32 x i8> %x, %y %max = select <32 x i1> %mask, <32 x i8> %x, <32 x i8> %y @@ -17,8 +16,7 @@ define <32 x i8> @test256_2(<32 x i8> %x, <32 x i8> %y, <32 x i8> %x1) nounwind ; CHECK-LABEL: test256_2: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpgtb %ymm1, %ymm0, %k1 -; CHECK-NEXT: vmovdqu8 %ymm0, %ymm2 {%k1} -; CHECK-NEXT: vmovaps %zmm2, %zmm0 +; CHECK-NEXT: vpblendmb %ymm0, %ymm2, %ymm0 {%k1} ; CHECK-NEXT: retq %mask = icmp sgt <32 x i8> %x, %y %max = select <32 x i1> %mask, <32 x i8> %x, <32 x i8> %x1 @@ -29,8 +27,7 @@ define <16 x i16> @test256_3(<16 x i16> %x, <16 x i16> %y, <16 x i16> %x1) nounw ; CHECK-LABEL: test256_3: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmplew %ymm0, %ymm1, %k1 -; CHECK-NEXT: vmovdqu16 %ymm2, %ymm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vpblendmw %ymm2, %ymm1, %ymm0 {%k1} ; CHECK-NEXT: retq %mask = icmp sge <16 x i16> %x, %y %max = select <16 x i1> %mask, <16 x i16> %x1, <16 x i16> %y @@ -41,8 +38,7 @@ define <32 x i8> @test256_4(<32 x i8> %x, <32 x i8> %y, <32 x i8> %x1) nounwind ; CHECK-LABEL: test256_4: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpnleub %ymm1, %ymm0, %k1 -; CHECK-NEXT: vmovdqu8 %ymm0, %ymm2 {%k1} -; CHECK-NEXT: vmovaps %zmm2, %zmm0 +; CHECK-NEXT: vpblendmb %ymm0, %ymm2, %ymm0 {%k1} ; CHECK-NEXT: retq %mask = icmp ugt <32 x i8> %x, %y %max = select <32 x i1> %mask, <32 x i8> %x, <32 x i8> %x1 @@ -53,8 +49,7 @@ define <16 x i16> @test256_5(<16 x i16> %x, <16 x i16> %x1, <16 x i16>* %yp) nou ; CHECK-LABEL: test256_5: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpeqw (%rdi), %ymm0, %k1 -; CHECK-NEXT: vmovdqu16 %ymm0, %ymm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vpblendmw %ymm0, %ymm1, %ymm0 {%k1} ; CHECK-NEXT: retq %y = load <16 x i16>, <16 x i16>* %yp, align 4 %mask = icmp eq <16 x i16> %x, %y @@ -66,8 +61,7 @@ define <16 x i16> @test256_6(<16 x i16> %x, <16 x i16> %x1, <16 x i16>* %y.ptr) ; CHECK-LABEL: test256_6: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpgtw (%rdi), %ymm0, %k1 -; CHECK-NEXT: vmovdqu16 %ymm0, %ymm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vpblendmw %ymm0, %ymm1, %ymm0 {%k1} ; CHECK-NEXT: retq %y = load <16 x i16>, <16 x i16>* %y.ptr, align 4 %mask = icmp sgt <16 x i16> %x, %y @@ -79,8 +73,7 @@ define <16 x i16> @test256_7(<16 x i16> %x, <16 x i16> %x1, <16 x i16>* %y.ptr) ; CHECK-LABEL: test256_7: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmplew (%rdi), %ymm0, %k1 -; CHECK-NEXT: vmovdqu16 %ymm0, %ymm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vpblendmw %ymm0, %ymm1, %ymm0 {%k1} ; CHECK-NEXT: retq %y = load <16 x i16>, <16 x i16>* %y.ptr, align 4 %mask = icmp sle <16 x i16> %x, %y @@ -92,8 +85,7 @@ define <16 x i16> @test256_8(<16 x i16> %x, <16 x i16> %x1, <16 x i16>* %y.ptr) ; CHECK-LABEL: test256_8: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpleuw (%rdi), %ymm0, %k1 -; CHECK-NEXT: vmovdqu16 %ymm0, %ymm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vpblendmw %ymm0, %ymm1, %ymm0 {%k1} ; CHECK-NEXT: retq %y = load <16 x i16>, <16 x i16>* %y.ptr, align 4 %mask = icmp ule <16 x i16> %x, %y @@ -106,8 +98,7 @@ define <16 x i16> @test256_9(<16 x i16> %x, <16 x i16> %y, <16 x i16> %x1, <16 x ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpeqw %ymm1, %ymm0, %k1 ; CHECK-NEXT: vpcmpeqw %ymm3, %ymm2, %k1 {%k1} -; CHECK-NEXT: vmovdqu16 %ymm0, %ymm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vpblendmw %ymm0, %ymm1, %ymm0 {%k1} ; CHECK-NEXT: retq %mask1 = icmp eq <16 x i16> %x1, %y1 %mask0 = icmp eq <16 x i16> %x, %y @@ -121,8 +112,7 @@ define <32 x i8> @test256_10(<32 x i8> %x, <32 x i8> %y, <32 x i8> %x1, <32 x i8 ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpleb %ymm1, %ymm0, %k1 ; CHECK-NEXT: vpcmpleb %ymm2, %ymm3, %k1 {%k1} -; CHECK-NEXT: vmovdqu8 %ymm0, %ymm2 {%k1} -; CHECK-NEXT: vmovaps %zmm2, %zmm0 +; CHECK-NEXT: vpblendmb %ymm0, %ymm2, %ymm0 {%k1} ; CHECK-NEXT: retq %mask1 = icmp sge <32 x i8> %x1, %y1 %mask0 = icmp sle <32 x i8> %x, %y @@ -136,8 +126,7 @@ define <32 x i8> @test256_11(<32 x i8> %x, <32 x i8>* %y.ptr, <32 x i8> %x1, <32 ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpgtb %ymm2, %ymm1, %k1 ; CHECK-NEXT: vpcmpgtb (%rdi), %ymm0, %k1 {%k1} -; CHECK-NEXT: vmovdqu8 %ymm0, %ymm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vpblendmb %ymm0, %ymm1, %ymm0 {%k1} ; CHECK-NEXT: retq %mask1 = icmp sgt <32 x i8> %x1, %y1 %y = load <32 x i8>, <32 x i8>* %y.ptr, align 4 @@ -152,8 +141,7 @@ define <16 x i16> @test256_12(<16 x i16> %x, <16 x i16>* %y.ptr, <16 x i16> %x1, ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmplew %ymm1, %ymm2, %k1 ; CHECK-NEXT: vpcmpleuw (%rdi), %ymm0, %k1 {%k1} -; CHECK-NEXT: vmovdqu16 %ymm0, %ymm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vpblendmw %ymm0, %ymm1, %ymm0 {%k1} ; CHECK-NEXT: retq %mask1 = icmp sge <16 x i16> %x1, %y1 %y = load <16 x i16>, <16 x i16>* %y.ptr, align 4 @@ -167,8 +155,7 @@ define <16 x i8> @test128_1(<16 x i8> %x, <16 x i8> %y) nounwind { ; CHECK-LABEL: test128_1: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpeqb %xmm1, %xmm0, %k1 -; CHECK-NEXT: vmovdqu8 %xmm0, %xmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vpblendmb %xmm0, %xmm1, %xmm0 {%k1} ; CHECK-NEXT: retq %mask = icmp eq <16 x i8> %x, %y %max = select <16 x i1> %mask, <16 x i8> %x, <16 x i8> %y @@ -179,8 +166,7 @@ define <16 x i8> @test128_2(<16 x i8> %x, <16 x i8> %y, <16 x i8> %x1) nounwind ; CHECK-LABEL: test128_2: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpgtb %xmm1, %xmm0, %k1 -; CHECK-NEXT: vmovdqu8 %xmm0, %xmm2 {%k1} -; CHECK-NEXT: vmovaps %zmm2, %zmm0 +; CHECK-NEXT: vpblendmb %xmm0, %xmm2, %xmm0 {%k1} ; CHECK-NEXT: retq %mask = icmp sgt <16 x i8> %x, %y %max = select <16 x i1> %mask, <16 x i8> %x, <16 x i8> %x1 @@ -191,8 +177,7 @@ define <8 x i16> @test128_3(<8 x i16> %x, <8 x i16> %y, <8 x i16> %x1) nounwind ; CHECK-LABEL: test128_3: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmplew %xmm0, %xmm1, %k1 -; CHECK-NEXT: vmovdqu16 %xmm2, %xmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vpblendmw %xmm2, %xmm1, %xmm0 {%k1} ; CHECK-NEXT: retq %mask = icmp sge <8 x i16> %x, %y %max = select <8 x i1> %mask, <8 x i16> %x1, <8 x i16> %y @@ -203,8 +188,7 @@ define <16 x i8> @test128_4(<16 x i8> %x, <16 x i8> %y, <16 x i8> %x1) nounwind ; CHECK-LABEL: test128_4: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpnleub %xmm1, %xmm0, %k1 -; CHECK-NEXT: vmovdqu8 %xmm0, %xmm2 {%k1} -; CHECK-NEXT: vmovaps %zmm2, %zmm0 +; CHECK-NEXT: vpblendmb %xmm0, %xmm2, %xmm0 {%k1} ; CHECK-NEXT: retq %mask = icmp ugt <16 x i8> %x, %y %max = select <16 x i1> %mask, <16 x i8> %x, <16 x i8> %x1 @@ -215,8 +199,7 @@ define <8 x i16> @test128_5(<8 x i16> %x, <8 x i16> %x1, <8 x i16>* %yp) nounwin ; CHECK-LABEL: test128_5: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpeqw (%rdi), %xmm0, %k1 -; CHECK-NEXT: vmovdqu16 %xmm0, %xmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vpblendmw %xmm0, %xmm1, %xmm0 {%k1} ; CHECK-NEXT: retq %y = load <8 x i16>, <8 x i16>* %yp, align 4 %mask = icmp eq <8 x i16> %x, %y @@ -228,8 +211,7 @@ define <8 x i16> @test128_6(<8 x i16> %x, <8 x i16> %x1, <8 x i16>* %y.ptr) noun ; CHECK-LABEL: test128_6: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpgtw (%rdi), %xmm0, %k1 -; CHECK-NEXT: vmovdqu16 %xmm0, %xmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vpblendmw %xmm0, %xmm1, %xmm0 {%k1} ; CHECK-NEXT: retq %y = load <8 x i16>, <8 x i16>* %y.ptr, align 4 %mask = icmp sgt <8 x i16> %x, %y @@ -241,8 +223,7 @@ define <8 x i16> @test128_7(<8 x i16> %x, <8 x i16> %x1, <8 x i16>* %y.ptr) noun ; CHECK-LABEL: test128_7: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmplew (%rdi), %xmm0, %k1 -; CHECK-NEXT: vmovdqu16 %xmm0, %xmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vpblendmw %xmm0, %xmm1, %xmm0 {%k1} ; CHECK-NEXT: retq %y = load <8 x i16>, <8 x i16>* %y.ptr, align 4 %mask = icmp sle <8 x i16> %x, %y @@ -254,8 +235,7 @@ define <8 x i16> @test128_8(<8 x i16> %x, <8 x i16> %x1, <8 x i16>* %y.ptr) noun ; CHECK-LABEL: test128_8: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpleuw (%rdi), %xmm0, %k1 -; CHECK-NEXT: vmovdqu16 %xmm0, %xmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vpblendmw %xmm0, %xmm1, %xmm0 {%k1} ; CHECK-NEXT: retq %y = load <8 x i16>, <8 x i16>* %y.ptr, align 4 %mask = icmp ule <8 x i16> %x, %y @@ -268,8 +248,7 @@ define <8 x i16> @test128_9(<8 x i16> %x, <8 x i16> %y, <8 x i16> %x1, <8 x i16> ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpeqw %xmm1, %xmm0, %k1 ; CHECK-NEXT: vpcmpeqw %xmm3, %xmm2, %k1 {%k1} -; CHECK-NEXT: vmovdqu16 %xmm0, %xmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vpblendmw %xmm0, %xmm1, %xmm0 {%k1} ; CHECK-NEXT: retq %mask1 = icmp eq <8 x i16> %x1, %y1 %mask0 = icmp eq <8 x i16> %x, %y @@ -283,8 +262,7 @@ define <16 x i8> @test128_10(<16 x i8> %x, <16 x i8> %y, <16 x i8> %x1, <16 x i8 ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpleb %xmm1, %xmm0, %k1 ; CHECK-NEXT: vpcmpleb %xmm2, %xmm3, %k1 {%k1} -; CHECK-NEXT: vmovdqu8 %xmm0, %xmm2 {%k1} -; CHECK-NEXT: vmovaps %zmm2, %zmm0 +; CHECK-NEXT: vpblendmb %xmm0, %xmm2, %xmm0 {%k1} ; CHECK-NEXT: retq %mask1 = icmp sge <16 x i8> %x1, %y1 %mask0 = icmp sle <16 x i8> %x, %y @@ -298,8 +276,7 @@ define <16 x i8> @test128_11(<16 x i8> %x, <16 x i8>* %y.ptr, <16 x i8> %x1, <16 ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpgtb %xmm2, %xmm1, %k1 ; CHECK-NEXT: vpcmpgtb (%rdi), %xmm0, %k1 {%k1} -; CHECK-NEXT: vmovdqu8 %xmm0, %xmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vpblendmb %xmm0, %xmm1, %xmm0 {%k1} ; CHECK-NEXT: retq %mask1 = icmp sgt <16 x i8> %x1, %y1 %y = load <16 x i8>, <16 x i8>* %y.ptr, align 4 @@ -314,8 +291,7 @@ define <8 x i16> @test128_12(<8 x i16> %x, <8 x i16>* %y.ptr, <8 x i16> %x1, <8 ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmplew %xmm1, %xmm2, %k1 ; CHECK-NEXT: vpcmpleuw (%rdi), %xmm0, %k1 {%k1} -; CHECK-NEXT: vmovdqu16 %xmm0, %xmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vpblendmw %xmm0, %xmm1, %xmm0 {%k1} ; CHECK-NEXT: retq %mask1 = icmp sge <8 x i16> %x1, %y1 %y = load <8 x i16>, <8 x i16>* %y.ptr, align 4 diff --git a/test/CodeGen/X86/avx512vl-mov.ll b/test/CodeGen/X86/avx512vl-mov.ll index aab104d7402..0838fb5c043 100644 --- a/test/CodeGen/X86/avx512vl-mov.ll +++ b/test/CodeGen/X86/avx512vl-mov.ll @@ -166,7 +166,7 @@ define <8 x i32> @test_256_17(i8 * %addr, <8 x i32> %old, <8 x i32> %mask1) { ; CHECK: ## BB#0: ; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0xef,0xd2] ; CHECK-NEXT: vpcmpneqd %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf3,0x75,0x28,0x1f,0xca,0x04] -; CHECK-NEXT: vmovdqa32 (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x6f,0x07] +; CHECK-NEXT: vpblendmd (%rdi), %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x64,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %mask = icmp ne <8 x i32> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <8 x i32>* @@ -180,7 +180,7 @@ define <8 x i32> @test_256_18(i8 * %addr, <8 x i32> %old, <8 x i32> %mask1) { ; CHECK: ## BB#0: ; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0xef,0xd2] ; CHECK-NEXT: vpcmpneqd %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf3,0x75,0x28,0x1f,0xca,0x04] -; CHECK-NEXT: vmovdqu32 (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0x7e,0x29,0x6f,0x07] +; CHECK-NEXT: vpblendmd (%rdi), %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x64,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %mask = icmp ne <8 x i32> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <8 x i32>* @@ -222,7 +222,7 @@ define <4 x i64> @test_256_21(i8 * %addr, <4 x i64> %old, <4 x i64> %mask1) { ; CHECK: ## BB#0: ; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0xef,0xd2] ; CHECK-NEXT: vpcmpneqq %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x28,0x1f,0xca,0x04] -; CHECK-NEXT: vmovdqa64 (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x6f,0x07] +; CHECK-NEXT: vpblendmq (%rdi), %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x64,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %mask = icmp ne <4 x i64> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <4 x i64>* @@ -236,7 +236,7 @@ define <4 x i64> @test_256_22(i8 * %addr, <4 x i64> %old, <4 x i64> %mask1) { ; CHECK: ## BB#0: ; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0xef,0xd2] ; CHECK-NEXT: vpcmpneqq %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x28,0x1f,0xca,0x04] -; CHECK-NEXT: vmovdqu64 (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0xfe,0x29,0x6f,0x07] +; CHECK-NEXT: vpblendmq (%rdi), %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x64,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %mask = icmp ne <4 x i64> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <4 x i64>* @@ -279,7 +279,7 @@ define <8 x float> @test_256_25(i8 * %addr, <8 x float> %old, <8 x float> %mask1 ; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0xef,0xd2] ; CHECK-NEXT: vcmpordps %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf1,0x74,0x28,0xc2,0xca,0x07] ; CHECK-NEXT: vcmpneqps %ymm2, %ymm1, %k1 {%k1} ## encoding: [0x62,0xf1,0x74,0x29,0xc2,0xca,0x04] -; CHECK-NEXT: vmovaps (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x28,0x07] +; CHECK-NEXT: vblendmps (%rdi), %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x65,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %mask = fcmp one <8 x float> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <8 x float>* @@ -294,7 +294,7 @@ define <8 x float> @test_256_26(i8 * %addr, <8 x float> %old, <8 x float> %mask1 ; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0xef,0xd2] ; CHECK-NEXT: vcmpordps %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf1,0x74,0x28,0xc2,0xca,0x07] ; CHECK-NEXT: vcmpneqps %ymm2, %ymm1, %k1 {%k1} ## encoding: [0x62,0xf1,0x74,0x29,0xc2,0xca,0x04] -; CHECK-NEXT: vmovups (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x10,0x07] +; CHECK-NEXT: vblendmps (%rdi), %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x65,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %mask = fcmp one <8 x float> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <8 x float>* @@ -338,7 +338,7 @@ define <4 x double> @test_256_29(i8 * %addr, <4 x double> %old, <4 x i64> %mask1 ; CHECK: ## BB#0: ; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0xef,0xd2] ; CHECK-NEXT: vpcmpneqq %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x28,0x1f,0xca,0x04] -; CHECK-NEXT: vmovapd (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x28,0x07] +; CHECK-NEXT: vblendmpd (%rdi), %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x65,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %mask = icmp ne <4 x i64> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <4 x double>* @@ -352,7 +352,7 @@ define <4 x double> @test_256_30(i8 * %addr, <4 x double> %old, <4 x i64> %mask1 ; CHECK: ## BB#0: ; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0xef,0xd2] ; CHECK-NEXT: vpcmpneqq %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x28,0x1f,0xca,0x04] -; CHECK-NEXT: vmovupd (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x10,0x07] +; CHECK-NEXT: vblendmpd (%rdi), %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x65,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %mask = icmp ne <4 x i64> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <4 x double>* @@ -554,7 +554,7 @@ define <4 x i32> @test_128_17(i8 * %addr, <4 x i32> %old, <4 x i32> %mask1) { ; CHECK: ## BB#0: ; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2] ; CHECK-NEXT: vpcmpneqd %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0x75,0x08,0x1f,0xca,0x04] -; CHECK-NEXT: vmovdqa32 (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x6f,0x07] +; CHECK-NEXT: vpblendmd (%rdi), %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x64,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %mask = icmp ne <4 x i32> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <4 x i32>* @@ -568,7 +568,7 @@ define <4 x i32> @test_128_18(i8 * %addr, <4 x i32> %old, <4 x i32> %mask1) { ; CHECK: ## BB#0: ; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2] ; CHECK-NEXT: vpcmpneqd %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0x75,0x08,0x1f,0xca,0x04] -; CHECK-NEXT: vmovdqu32 (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0x7e,0x09,0x6f,0x07] +; CHECK-NEXT: vpblendmd (%rdi), %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x64,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %mask = icmp ne <4 x i32> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <4 x i32>* @@ -610,7 +610,7 @@ define <2 x i64> @test_128_21(i8 * %addr, <2 x i64> %old, <2 x i64> %mask1) { ; CHECK: ## BB#0: ; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2] ; CHECK-NEXT: vpcmpneqq %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x08,0x1f,0xca,0x04] -; CHECK-NEXT: vmovdqa64 (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x6f,0x07] +; CHECK-NEXT: vpblendmq (%rdi), %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x64,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %mask = icmp ne <2 x i64> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <2 x i64>* @@ -624,7 +624,7 @@ define <2 x i64> @test_128_22(i8 * %addr, <2 x i64> %old, <2 x i64> %mask1) { ; CHECK: ## BB#0: ; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2] ; CHECK-NEXT: vpcmpneqq %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x08,0x1f,0xca,0x04] -; CHECK-NEXT: vmovdqu64 (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0xfe,0x09,0x6f,0x07] +; CHECK-NEXT: vpblendmq (%rdi), %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x64,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %mask = icmp ne <2 x i64> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <2 x i64>* @@ -666,7 +666,7 @@ define <4 x float> @test_128_25(i8 * %addr, <4 x float> %old, <4 x i32> %mask1) ; CHECK: ## BB#0: ; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2] ; CHECK-NEXT: vpcmpneqd %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0x75,0x08,0x1f,0xca,0x04] -; CHECK-NEXT: vmovaps (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x28,0x07] +; CHECK-NEXT: vblendmps (%rdi), %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x65,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %mask = icmp ne <4 x i32> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <4 x float>* @@ -680,7 +680,7 @@ define <4 x float> @test_128_26(i8 * %addr, <4 x float> %old, <4 x i32> %mask1) ; CHECK: ## BB#0: ; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2] ; CHECK-NEXT: vpcmpneqd %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0x75,0x08,0x1f,0xca,0x04] -; CHECK-NEXT: vmovups (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x10,0x07] +; CHECK-NEXT: vblendmps (%rdi), %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x65,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %mask = icmp ne <4 x i32> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <4 x float>* @@ -722,7 +722,7 @@ define <2 x double> @test_128_29(i8 * %addr, <2 x double> %old, <2 x i64> %mask1 ; CHECK: ## BB#0: ; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2] ; CHECK-NEXT: vpcmpneqq %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x08,0x1f,0xca,0x04] -; CHECK-NEXT: vmovapd (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x28,0x07] +; CHECK-NEXT: vblendmpd (%rdi), %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x65,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %mask = icmp ne <2 x i64> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <2 x double>* @@ -736,7 +736,7 @@ define <2 x double> @test_128_30(i8 * %addr, <2 x double> %old, <2 x i64> %mask1 ; CHECK: ## BB#0: ; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2] ; CHECK-NEXT: vpcmpneqq %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x08,0x1f,0xca,0x04] -; CHECK-NEXT: vmovupd (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x10,0x07] +; CHECK-NEXT: vblendmpd (%rdi), %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x65,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %mask = icmp ne <2 x i64> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <2 x double>* diff --git a/test/CodeGen/X86/avx512vl-vec-cmp.ll b/test/CodeGen/X86/avx512vl-vec-cmp.ll index 293a27d2fac..62c8a26d1e6 100644 --- a/test/CodeGen/X86/avx512vl-vec-cmp.ll +++ b/test/CodeGen/X86/avx512vl-vec-cmp.ll @@ -5,8 +5,7 @@ define <4 x i64> @test256_1(<4 x i64> %x, <4 x i64> %y) nounwind { ; CHECK-LABEL: test256_1: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k1 -; CHECK-NEXT: vmovdqa64 %ymm0, %ymm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vpblendmq %ymm0, %ymm1, %ymm0 {%k1} ; CHECK-NEXT: retq %mask = icmp eq <4 x i64> %x, %y %max = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> %y @@ -17,8 +16,7 @@ define <4 x i64> @test256_2(<4 x i64> %x, <4 x i64> %y, <4 x i64> %x1) nounwind ; CHECK-LABEL: test256_2: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpgtq %ymm1, %ymm0, %k1 -; CHECK-NEXT: vmovdqa64 %ymm2, %ymm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vpblendmq %ymm2, %ymm1, %ymm0 {%k1} ; CHECK-NEXT: retq %mask = icmp sgt <4 x i64> %x, %y %max = select <4 x i1> %mask, <4 x i64> %x1, <4 x i64> %y @@ -29,8 +27,7 @@ define <8 x i32> @test256_3(<8 x i32> %x, <8 x i32> %y, <8 x i32> %x1) nounwind ; CHECK-LABEL: test256_3: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpled %ymm0, %ymm1, %k1 -; CHECK-NEXT: vmovdqa32 %ymm2, %ymm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vpblendmd %ymm2, %ymm1, %ymm0 {%k1} ; CHECK-NEXT: retq %mask = icmp sge <8 x i32> %x, %y %max = select <8 x i1> %mask, <8 x i32> %x1, <8 x i32> %y @@ -41,8 +38,7 @@ define <4 x i64> @test256_4(<4 x i64> %x, <4 x i64> %y, <4 x i64> %x1) nounwind ; CHECK-LABEL: test256_4: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpnleuq %ymm1, %ymm0, %k1 -; CHECK-NEXT: vmovdqa64 %ymm2, %ymm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vpblendmq %ymm2, %ymm1, %ymm0 {%k1} ; CHECK-NEXT: retq %mask = icmp ugt <4 x i64> %x, %y %max = select <4 x i1> %mask, <4 x i64> %x1, <4 x i64> %y @@ -53,8 +49,7 @@ define <8 x i32> @test256_5(<8 x i32> %x, <8 x i32> %x1, <8 x i32>* %yp) nounwin ; CHECK-LABEL: test256_5: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpeqd (%rdi), %ymm0, %k1 -; CHECK-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} ; CHECK-NEXT: retq %y = load <8 x i32>, <8 x i32>* %yp, align 4 %mask = icmp eq <8 x i32> %x, %y @@ -66,8 +61,7 @@ define <8 x i32> @test256_6(<8 x i32> %x, <8 x i32> %x1, <8 x i32>* %y.ptr) noun ; CHECK-LABEL: test256_6: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpgtd (%rdi), %ymm0, %k1 -; CHECK-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} ; CHECK-NEXT: retq %y = load <8 x i32>, <8 x i32>* %y.ptr, align 4 %mask = icmp sgt <8 x i32> %x, %y @@ -79,8 +73,7 @@ define <8 x i32> @test256_7(<8 x i32> %x, <8 x i32> %x1, <8 x i32>* %y.ptr) noun ; CHECK-LABEL: test256_7: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpled (%rdi), %ymm0, %k1 -; CHECK-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} ; CHECK-NEXT: retq %y = load <8 x i32>, <8 x i32>* %y.ptr, align 4 %mask = icmp sle <8 x i32> %x, %y @@ -92,8 +85,7 @@ define <8 x i32> @test256_8(<8 x i32> %x, <8 x i32> %x1, <8 x i32>* %y.ptr) noun ; CHECK-LABEL: test256_8: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpleud (%rdi), %ymm0, %k1 -; CHECK-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} ; CHECK-NEXT: retq %y = load <8 x i32>, <8 x i32>* %y.ptr, align 4 %mask = icmp ule <8 x i32> %x, %y @@ -106,8 +98,7 @@ define <8 x i32> @test256_9(<8 x i32> %x, <8 x i32> %y, <8 x i32> %x1, <8 x i32> ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpeqd %ymm1, %ymm0, %k1 ; CHECK-NEXT: vpcmpeqd %ymm3, %ymm2, %k1 {%k1} -; CHECK-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} ; CHECK-NEXT: retq %mask1 = icmp eq <8 x i32> %x1, %y1 %mask0 = icmp eq <8 x i32> %x, %y @@ -121,8 +112,7 @@ define <4 x i64> @test256_10(<4 x i64> %x, <4 x i64> %y, <4 x i64> %x1, <4 x i64 ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpleq %ymm1, %ymm0, %k1 ; CHECK-NEXT: vpcmpleq %ymm2, %ymm3, %k1 {%k1} -; CHECK-NEXT: vmovdqa64 %ymm0, %ymm2 {%k1} -; CHECK-NEXT: vmovaps %zmm2, %zmm0 +; CHECK-NEXT: vpblendmq %ymm0, %ymm2, %ymm0 {%k1} ; CHECK-NEXT: retq %mask1 = icmp sge <4 x i64> %x1, %y1 %mask0 = icmp sle <4 x i64> %x, %y @@ -136,8 +126,7 @@ define <4 x i64> @test256_11(<4 x i64> %x, <4 x i64>* %y.ptr, <4 x i64> %x1, <4 ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpgtq %ymm2, %ymm1, %k1 ; CHECK-NEXT: vpcmpgtq (%rdi), %ymm0, %k1 {%k1} -; CHECK-NEXT: vmovdqa64 %ymm0, %ymm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vpblendmq %ymm0, %ymm1, %ymm0 {%k1} ; CHECK-NEXT: retq %mask1 = icmp sgt <4 x i64> %x1, %y1 %y = load <4 x i64>, <4 x i64>* %y.ptr, align 4 @@ -152,8 +141,7 @@ define <8 x i32> @test256_12(<8 x i32> %x, <8 x i32>* %y.ptr, <8 x i32> %x1, <8 ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpled %ymm1, %ymm2, %k1 ; CHECK-NEXT: vpcmpleud (%rdi), %ymm0, %k1 {%k1} -; CHECK-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} ; CHECK-NEXT: retq %mask1 = icmp sge <8 x i32> %x1, %y1 %y = load <8 x i32>, <8 x i32>* %y.ptr, align 4 @@ -167,8 +155,7 @@ define <4 x i64> @test256_13(<4 x i64> %x, <4 x i64> %x1, i64* %yb.ptr) nounwind ; CHECK-LABEL: test256_13: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpeqq (%rdi){1to4}, %ymm0, %k1 -; CHECK-NEXT: vmovdqa64 %ymm0, %ymm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vpblendmq %ymm0, %ymm1, %ymm0 {%k1} ; CHECK-NEXT: retq %yb = load i64, i64* %yb.ptr, align 4 %y.0 = insertelement <4 x i64> undef, i64 %yb, i32 0 @@ -182,8 +169,7 @@ define <8 x i32> @test256_14(<8 x i32> %x, i32* %yb.ptr, <8 x i32> %x1) nounwind ; CHECK-LABEL: test256_14: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpled (%rdi){1to8}, %ymm0, %k1 -; CHECK-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} ; CHECK-NEXT: retq %yb = load i32, i32* %yb.ptr, align 4 %y.0 = insertelement <8 x i32> undef, i32 %yb, i32 0 @@ -198,8 +184,7 @@ define <8 x i32> @test256_15(<8 x i32> %x, i32* %yb.ptr, <8 x i32> %x1, <8 x i32 ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpled %ymm1, %ymm2, %k1 ; CHECK-NEXT: vpcmpgtd (%rdi){1to8}, %ymm0, %k1 {%k1} -; CHECK-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} ; CHECK-NEXT: retq %mask1 = icmp sge <8 x i32> %x1, %y1 %yb = load i32, i32* %yb.ptr, align 4 @@ -216,8 +201,7 @@ define <4 x i64> @test256_16(<4 x i64> %x, i64* %yb.ptr, <4 x i64> %x1, <4 x i64 ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpleq %ymm1, %ymm2, %k1 ; CHECK-NEXT: vpcmpgtq (%rdi){1to4}, %ymm0, %k1 {%k1} -; CHECK-NEXT: vmovdqa64 %ymm0, %ymm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vpblendmq %ymm0, %ymm1, %ymm0 {%k1} ; CHECK-NEXT: retq %mask1 = icmp sge <4 x i64> %x1, %y1 %yb = load i64, i64* %yb.ptr, align 4 @@ -233,8 +217,7 @@ define <2 x i64> @test128_1(<2 x i64> %x, <2 x i64> %y) nounwind { ; CHECK-LABEL: test128_1: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k1 -; CHECK-NEXT: vmovdqa64 %xmm0, %xmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vpblendmq %xmm0, %xmm1, %xmm0 {%k1} ; CHECK-NEXT: retq %mask = icmp eq <2 x i64> %x, %y %max = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> %y @@ -245,8 +228,7 @@ define <2 x i64> @test128_2(<2 x i64> %x, <2 x i64> %y, <2 x i64> %x1) nounwind ; CHECK-LABEL: test128_2: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpgtq %xmm1, %xmm0, %k1 -; CHECK-NEXT: vmovdqa64 %xmm2, %xmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vpblendmq %xmm2, %xmm1, %xmm0 {%k1} ; CHECK-NEXT: retq %mask = icmp sgt <2 x i64> %x, %y %max = select <2 x i1> %mask, <2 x i64> %x1, <2 x i64> %y @@ -257,8 +239,7 @@ define <4 x i32> @test128_3(<4 x i32> %x, <4 x i32> %y, <4 x i32> %x1) nounwind ; CHECK-LABEL: test128_3: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpled %xmm0, %xmm1, %k1 -; CHECK-NEXT: vmovdqa32 %xmm2, %xmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vpblendmd %xmm2, %xmm1, %xmm0 {%k1} ; CHECK-NEXT: retq %mask = icmp sge <4 x i32> %x, %y %max = select <4 x i1> %mask, <4 x i32> %x1, <4 x i32> %y @@ -269,8 +250,7 @@ define <2 x i64> @test128_4(<2 x i64> %x, <2 x i64> %y, <2 x i64> %x1) nounwind ; CHECK-LABEL: test128_4: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpnleuq %xmm1, %xmm0, %k1 -; CHECK-NEXT: vmovdqa64 %xmm2, %xmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vpblendmq %xmm2, %xmm1, %xmm0 {%k1} ; CHECK-NEXT: retq %mask = icmp ugt <2 x i64> %x, %y %max = select <2 x i1> %mask, <2 x i64> %x1, <2 x i64> %y @@ -281,8 +261,7 @@ define <4 x i32> @test128_5(<4 x i32> %x, <4 x i32> %x1, <4 x i32>* %yp) nounwin ; CHECK-LABEL: test128_5: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpeqd (%rdi), %xmm0, %k1 -; CHECK-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1} ; CHECK-NEXT: retq %y = load <4 x i32>, <4 x i32>* %yp, align 4 %mask = icmp eq <4 x i32> %x, %y @@ -294,8 +273,7 @@ define <4 x i32> @test128_6(<4 x i32> %x, <4 x i32> %x1, <4 x i32>* %y.ptr) noun ; CHECK-LABEL: test128_6: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpgtd (%rdi), %xmm0, %k1 -; CHECK-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1} ; CHECK-NEXT: retq %y = load <4 x i32>, <4 x i32>* %y.ptr, align 4 %mask = icmp sgt <4 x i32> %x, %y @@ -307,8 +285,7 @@ define <4 x i32> @test128_7(<4 x i32> %x, <4 x i32> %x1, <4 x i32>* %y.ptr) noun ; CHECK-LABEL: test128_7: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpled (%rdi), %xmm0, %k1 -; CHECK-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1} ; CHECK-NEXT: retq %y = load <4 x i32>, <4 x i32>* %y.ptr, align 4 %mask = icmp sle <4 x i32> %x, %y @@ -320,8 +297,7 @@ define <4 x i32> @test128_8(<4 x i32> %x, <4 x i32> %x1, <4 x i32>* %y.ptr) noun ; CHECK-LABEL: test128_8: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpleud (%rdi), %xmm0, %k1 -; CHECK-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1} ; CHECK-NEXT: retq %y = load <4 x i32>, <4 x i32>* %y.ptr, align 4 %mask = icmp ule <4 x i32> %x, %y @@ -334,8 +310,7 @@ define <4 x i32> @test128_9(<4 x i32> %x, <4 x i32> %y, <4 x i32> %x1, <4 x i32> ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k1 ; CHECK-NEXT: vpcmpeqd %xmm3, %xmm2, %k1 {%k1} -; CHECK-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1} ; CHECK-NEXT: retq %mask1 = icmp eq <4 x i32> %x1, %y1 %mask0 = icmp eq <4 x i32> %x, %y @@ -349,8 +324,7 @@ define <2 x i64> @test128_10(<2 x i64> %x, <2 x i64> %y, <2 x i64> %x1, <2 x i64 ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpleq %xmm1, %xmm0, %k1 ; CHECK-NEXT: vpcmpleq %xmm2, %xmm3, %k1 {%k1} -; CHECK-NEXT: vmovdqa64 %xmm0, %xmm2 {%k1} -; CHECK-NEXT: vmovaps %zmm2, %zmm0 +; CHECK-NEXT: vpblendmq %xmm0, %xmm2, %xmm0 {%k1} ; CHECK-NEXT: retq %mask1 = icmp sge <2 x i64> %x1, %y1 %mask0 = icmp sle <2 x i64> %x, %y @@ -364,8 +338,7 @@ define <2 x i64> @test128_11(<2 x i64> %x, <2 x i64>* %y.ptr, <2 x i64> %x1, <2 ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpgtq %xmm2, %xmm1, %k1 ; CHECK-NEXT: vpcmpgtq (%rdi), %xmm0, %k1 {%k1} -; CHECK-NEXT: vmovdqa64 %xmm0, %xmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vpblendmq %xmm0, %xmm1, %xmm0 {%k1} ; CHECK-NEXT: retq %mask1 = icmp sgt <2 x i64> %x1, %y1 %y = load <2 x i64>, <2 x i64>* %y.ptr, align 4 @@ -380,8 +353,7 @@ define <4 x i32> @test128_12(<4 x i32> %x, <4 x i32>* %y.ptr, <4 x i32> %x1, <4 ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpled %xmm1, %xmm2, %k1 ; CHECK-NEXT: vpcmpleud (%rdi), %xmm0, %k1 {%k1} -; CHECK-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1} ; CHECK-NEXT: retq %mask1 = icmp sge <4 x i32> %x1, %y1 %y = load <4 x i32>, <4 x i32>* %y.ptr, align 4 @@ -395,8 +367,7 @@ define <2 x i64> @test128_13(<2 x i64> %x, <2 x i64> %x1, i64* %yb.ptr) nounwind ; CHECK-LABEL: test128_13: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpeqq (%rdi){1to2}, %xmm0, %k1 -; CHECK-NEXT: vmovdqa64 %xmm0, %xmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vpblendmq %xmm0, %xmm1, %xmm0 {%k1} ; CHECK-NEXT: retq %yb = load i64, i64* %yb.ptr, align 4 %y.0 = insertelement <2 x i64> undef, i64 %yb, i32 0 @@ -410,8 +381,7 @@ define <4 x i32> @test128_14(<4 x i32> %x, i32* %yb.ptr, <4 x i32> %x1) nounwind ; CHECK-LABEL: test128_14: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpled (%rdi){1to4}, %xmm0, %k1 -; CHECK-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1} ; CHECK-NEXT: retq %yb = load i32, i32* %yb.ptr, align 4 %y.0 = insertelement <4 x i32> undef, i32 %yb, i32 0 @@ -426,8 +396,7 @@ define <4 x i32> @test128_15(<4 x i32> %x, i32* %yb.ptr, <4 x i32> %x1, <4 x i32 ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpled %xmm1, %xmm2, %k1 ; CHECK-NEXT: vpcmpgtd (%rdi){1to4}, %xmm0, %k1 {%k1} -; CHECK-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1} ; CHECK-NEXT: retq %mask1 = icmp sge <4 x i32> %x1, %y1 %yb = load i32, i32* %yb.ptr, align 4 @@ -444,8 +413,7 @@ define <2 x i64> @test128_16(<2 x i64> %x, i64* %yb.ptr, <2 x i64> %x1, <2 x i64 ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpleq %xmm1, %xmm2, %k1 ; CHECK-NEXT: vpcmpgtq (%rdi){1to2}, %xmm0, %k1 {%k1} -; CHECK-NEXT: vmovdqa64 %xmm0, %xmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vpblendmq %xmm0, %xmm1, %xmm0 {%k1} ; CHECK-NEXT: retq %mask1 = icmp sge <2 x i64> %x1, %y1 %yb = load i64, i64* %yb.ptr, align 4 diff --git a/test/CodeGen/X86/masked_gather_scatter.ll b/test/CodeGen/X86/masked_gather_scatter.ll index 6ebb2185d03..416ccdc68c7 100644 --- a/test/CodeGen/X86/masked_gather_scatter.ll +++ b/test/CodeGen/X86/masked_gather_scatter.ll @@ -1490,8 +1490,7 @@ define <3 x i32> @test30(<3 x i32*> %base, <3 x i32> %ind, <3 x i1> %mask, <3 x ; SKX-NEXT: vmovq %xmm1, %rax ; SKX-NEXT: vpinsrd $2, (%rax), %xmm0, %xmm0 ; SKX-NEXT: .LBB29_6: # %else5 -; SKX-NEXT: vmovdqa32 %xmm0, %xmm3 {%k1} -; SKX-NEXT: vmovaps %zmm3, %zmm0 +; SKX-NEXT: vpblendmd %xmm0, %xmm3, %xmm0 {%k1} ; SKX-NEXT: retq ; ; SKX_32-LABEL: test30: @@ -1503,33 +1502,33 @@ define <3 x i32> @test30(<3 x i32*> %base, <3 x i32> %ind, <3 x i1> %mask, <3 x ; SKX_32-NEXT: vptestmd %xmm2, %xmm2, %k1 ; SKX_32-NEXT: kmovb %k1, {{[0-9]+}}(%esp) ; SKX_32-NEXT: vpslld $2, %xmm1, %xmm1 -; SKX_32-NEXT: vpaddd %xmm1, %xmm0, %xmm2 +; SKX_32-NEXT: vpaddd %xmm1, %xmm0, %xmm1 ; SKX_32-NEXT: movb {{[0-9]+}}(%esp), %al -; SKX_32-NEXT: # implicit-def: %XMM1 +; SKX_32-NEXT: # implicit-def: %XMM0 ; SKX_32-NEXT: testb %al, %al ; SKX_32-NEXT: je .LBB29_2 ; SKX_32-NEXT: # BB#1: # %cond.load -; SKX_32-NEXT: vmovd %xmm2, %eax -; SKX_32-NEXT: vmovd (%eax), %xmm1 +; SKX_32-NEXT: vmovd %xmm1, %eax +; SKX_32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SKX_32-NEXT: .LBB29_2: # %else ; SKX_32-NEXT: kmovb %k1, {{[0-9]+}}(%esp) ; SKX_32-NEXT: movb {{[0-9]+}}(%esp), %al ; SKX_32-NEXT: testb %al, %al ; SKX_32-NEXT: je .LBB29_4 ; SKX_32-NEXT: # BB#3: # %cond.load1 -; SKX_32-NEXT: vpextrd $1, %xmm2, %eax -; SKX_32-NEXT: vpinsrd $1, (%eax), %xmm1, %xmm1 +; SKX_32-NEXT: vpextrd $1, %xmm1, %eax +; SKX_32-NEXT: vpinsrd $1, (%eax), %xmm0, %xmm0 ; SKX_32-NEXT: .LBB29_4: # %else2 -; SKX_32-NEXT: vmovdqa32 {{[0-9]+}}(%esp), %xmm0 +; SKX_32-NEXT: vmovdqa32 {{[0-9]+}}(%esp), %xmm2 ; SKX_32-NEXT: kmovb %k1, (%esp) ; SKX_32-NEXT: movb (%esp), %al ; SKX_32-NEXT: testb %al, %al ; SKX_32-NEXT: je .LBB29_6 ; SKX_32-NEXT: # BB#5: # %cond.load4 -; SKX_32-NEXT: vpextrd $2, %xmm2, %eax -; SKX_32-NEXT: vpinsrd $2, (%eax), %xmm1, %xmm1 +; SKX_32-NEXT: vpextrd $2, %xmm1, %eax +; SKX_32-NEXT: vpinsrd $2, (%eax), %xmm0, %xmm0 ; SKX_32-NEXT: .LBB29_6: # %else5 -; SKX_32-NEXT: vmovdqa32 %xmm1, %xmm0 {%k1} +; SKX_32-NEXT: vpblendmd %xmm0, %xmm2, %xmm0 {%k1} ; SKX_32-NEXT: addl $12, %esp ; SKX_32-NEXT: retl