From c3fdaa59e0ca311bbe98ccde578ae76b40f960d6 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sat, 25 Feb 2017 19:18:11 +0000 Subject: [PATCH] [AVX-512] Fix the execution domain on some instructions. llvm-svn: 296270 --- lib/Target/X86/X86InstrAVX512.td | 17 +++++++++++++---- test/CodeGen/X86/avx512-intrinsics.ll | 4 ++-- test/CodeGen/X86/avx512er-intrinsics.ll | 2 +- test/CodeGen/X86/avx512vl-intrinsics.ll | 8 ++++---- 4 files changed, 20 insertions(+), 11 deletions(-) diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 1de8e2b25d1..24ed183872c 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -6898,7 +6898,7 @@ let Defs = [EFLAGS], Predicates = [HasAVX512] in { /// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd multiclass avx512_fp14_s opc, string OpcodeStr, SDNode OpNode, X86VectorVTInfo _> { - let Predicates = [HasAVX512] in { + let Predicates = [HasAVX512], ExeDomain = _.ExeDomain in { defm rr : AVX512_maskable_scalar, /// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd multiclass avx512_fp14_p opc, string OpcodeStr, SDNode OpNode, X86VectorVTInfo _> { + let ExeDomain = _.ExeDomain in { defm r: AVX512_maskable, EVEX, T8PD; @@ -6936,6 +6937,7 @@ multiclass avx512_fp14_p opc, string OpcodeStr, SDNode OpNode, (OpNode (_.FloatVT (X86VBroadcast (_.ScalarLdFrag addr:$src))))>, EVEX, T8PD, EVEX_B; + } } multiclass avx512_fp14_p_vl_all opc, string OpcodeStr, SDNode OpNode> { @@ -6967,7 +6969,7 @@ defm VRCP14 : avx512_fp14_p_vl_all<0x4C, "vrcp14", X86frcp>; /// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd multiclass avx512_fp28_s opc, string OpcodeStr,X86VectorVTInfo _, SDNode OpNode> { - + let ExeDomain = _.ExeDomain in { defm r : AVX512_maskable_scalar opc, string OpcodeStr,X86VectorVTInfo _, (OpNode (_.VT _.RC:$src1), (_.VT (scalar_to_vector (_.ScalarLdFrag addr:$src2))), (i32 FROUND_CURRENT))>; + } } multiclass avx512_eri_s opc, string OpcodeStr, SDNode OpNode> { @@ -7005,7 +7008,7 @@ defm VGETEXP : avx512_eri_s<0x43, "vgetexp", X86fgetexpRnds>, T8PD, EVEX_4V; multiclass avx512_fp28_p opc, string OpcodeStr, X86VectorVTInfo _, SDNode OpNode> { - + let ExeDomain = _.ExeDomain in { defm r : AVX512_maskable; @@ -7022,9 +7025,11 @@ multiclass avx512_fp28_p opc, string OpcodeStr, X86VectorVTInfo _, (OpNode (_.FloatVT (X86VBroadcast (_.ScalarLdFrag addr:$src))), (i32 FROUND_CURRENT))>, EVEX_B; + } } multiclass avx512_fp28_p_round opc, string OpcodeStr, X86VectorVTInfo _, SDNode OpNode> { + let ExeDomain = _.ExeDomain in defm rb : AVX512_maskable, multiclass avx512_sqrt_packed_round opc, string OpcodeStr, SDNode OpNodeRnd, X86VectorVTInfo _>{ + let ExeDomain = _.ExeDomain in defm rb: AVX512_maskable, @@ -7073,6 +7079,7 @@ multiclass avx512_sqrt_packed_round opc, string OpcodeStr, multiclass avx512_sqrt_packed opc, string OpcodeStr, SDNode OpNode, X86VectorVTInfo _>{ + let ExeDomain = _.ExeDomain in { defm r: AVX512_maskable, EVEX; @@ -7087,6 +7094,7 @@ multiclass avx512_sqrt_packed opc, string OpcodeStr, (OpNode (_.FloatVT (X86VBroadcast (_.ScalarLdFrag addr:$src))))>, EVEX, EVEX_B; + } } multiclass avx512_sqrt_packed_all opc, string OpcodeStr, @@ -7124,7 +7132,7 @@ multiclass avx512_sqrt_packed_all_round opc, string OpcodeStr, multiclass avx512_sqrt_scalar opc, string OpcodeStr,X86VectorVTInfo _, string SUFF, SDNode OpNode, SDNode OpNodeRnd> { - + let ExeDomain = _.ExeDomain in { defm r_Int : AVX512_maskable_scalar opc, string OpcodeStr,X86VectorVTInfo _, (ins _.FRC:$src1, _.ScalarMemOp:$src2), OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>; } + } def : Pat<(_.EltVT (OpNode _.FRC:$src)), (!cast(NAME#SUFF#Zr) diff --git a/test/CodeGen/X86/avx512-intrinsics.ll b/test/CodeGen/X86/avx512-intrinsics.ll index 4e95cffd4d0..bd60531fad7 100644 --- a/test/CodeGen/X86/avx512-intrinsics.ll +++ b/test/CodeGen/X86/avx512-intrinsics.ll @@ -223,7 +223,7 @@ define <2 x double> @test_sqrt_sd(<2 x double> %a0, <2 x double> %a1, <2 x doubl ; CHECK: ## BB#0: ; CHECK-NEXT: andl $1, %edi ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vmovaps %xmm2, %xmm3 +; CHECK-NEXT: vmovapd %xmm2, %xmm3 ; CHECK-NEXT: vsqrtsd %xmm1, %xmm0, %xmm3 {%k1} ; CHECK-NEXT: vsqrtsd {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1} ; CHECK-NEXT: vsqrtsd {ru-sae}, %xmm1, %xmm0, %xmm4 {%k1} {z} @@ -3602,7 +3602,7 @@ define <2 x double> @test_getexp_sd(<2 x double> %a0, <2 x double> %a1, <2 x dou ; CHECK: ## BB#0: ; CHECK-NEXT: andl $1, %edi ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vmovaps %xmm2, %xmm3 +; CHECK-NEXT: vmovapd %xmm2, %xmm3 ; CHECK-NEXT: vgetexpsd %xmm1, %xmm0, %xmm3 {%k1} ; CHECK-NEXT: vgetexpsd %xmm1, %xmm0, %xmm4 ; CHECK-NEXT: vgetexpsd {sae}, %xmm1, %xmm0, %xmm2 {%k1} diff --git a/test/CodeGen/X86/avx512er-intrinsics.ll b/test/CodeGen/X86/avx512er-intrinsics.ll index 24fb056d9ed..ca130bd2b67 100644 --- a/test/CodeGen/X86/avx512er-intrinsics.ll +++ b/test/CodeGen/X86/avx512er-intrinsics.ll @@ -158,7 +158,7 @@ define <2 x double> @test_rsqrt28_sd_mask(<2 x double> %a0, <2 x double> %b0, <2 ; CHECK-NEXT: kxnorw %k0, %k0, %k0 # encoding: [0xc5,0xfc,0x46,0xc0] ; CHECK-NEXT: kshiftrw $15, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x30,0xc8,0x0f] ; CHECK-NEXT: vrsqrt28sd {sae}, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x19,0xcd,0xd1] -; CHECK-NEXT: vmovaps %xmm2, %xmm0 # encoding: [0xc5,0xf8,0x28,0xc2] +; CHECK-NEXT: vmovapd %xmm2, %xmm0 # encoding: [0xc5,0xf9,0x28,0xc2] ; CHECK-NEXT: retq # encoding: [0xc3] %res = call <2 x double> @llvm.x86.avx512.rsqrt28.sd(<2 x double> %a0, <2 x double> %b0, <2 x double> %c0, i8 7, i32 8) ; ret <2 x double> %res diff --git a/test/CodeGen/X86/avx512vl-intrinsics.ll b/test/CodeGen/X86/avx512vl-intrinsics.ll index be3bc111c4c..17593f34733 100644 --- a/test/CodeGen/X86/avx512vl-intrinsics.ll +++ b/test/CodeGen/X86/avx512vl-intrinsics.ll @@ -4100,7 +4100,7 @@ define <4 x double> @test_rsqrt_pd_256_rrk(<4 x double> %a0, <4 x double> %a1, i ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vrsqrt14pd %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x4e,0xc8] -; CHECK-NEXT: vmovaps %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1] +; CHECK-NEXT: vmovapd %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x double> @llvm.x86.avx512.rsqrt14.pd.256(<4 x double> %a0, <4 x double> %a1, i8 %mask) ret <4 x double> %res @@ -4130,7 +4130,7 @@ define <2 x double> @test_rsqrt_pd_128_rrk(<2 x double> %a0, <2 x double> %a1, i ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vrsqrt14pd %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x4e,0xc8] -; CHECK-NEXT: vmovaps %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] +; CHECK-NEXT: vmovapd %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x double> @llvm.x86.avx512.rsqrt14.pd.128(<2 x double> %a0, <2 x double> %a1, i8 %mask) ret <2 x double> %res @@ -4163,7 +4163,7 @@ define <4 x double> @test_rcp_pd_256_rrk(<4 x double> %a0, <4 x double> %a1, i8 ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vrcp14pd %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x4c,0xc8] -; CHECK-NEXT: vmovaps %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1] +; CHECK-NEXT: vmovapd %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x double> @llvm.x86.avx512.rcp14.pd.256(<4 x double> %a0, <4 x double> %a1, i8 %mask) ret <4 x double> %res @@ -4193,7 +4193,7 @@ define <2 x double> @test_rcp_pd_128_rrk(<2 x double> %a0, <2 x double> %a1, i8 ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vrcp14pd %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x4c,0xc8] -; CHECK-NEXT: vmovaps %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] +; CHECK-NEXT: vmovapd %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x double> @llvm.x86.avx512.rcp14.pd.128(<2 x double> %a0, <2 x double> %a1, i8 %mask) ret <2 x double> %res