From 32daaef0aa6c9a0c49deb3523091b04c9923ec92 Mon Sep 17 00:00:00 2001 From: Elena Demikhovsky Date: Sun, 19 Jul 2015 10:17:33 +0000 Subject: [PATCH] AVX-512: Floating point conversions for SKX - DAG Lowering. SKX supports conversion for all FP types. Integer types include doublewords and quardwords. I added "Legal" status for these nodes and a bunch of tests. I added "NoVLX" for AVX DAG selection to force VLX instructions selection when VLX is supported. Differential Revision: http://reviews.llvm.org/D11255 llvm-svn: 242637 --- lib/Target/X86/X86ISelLowering.cpp | 29 +++++++- lib/Target/X86/X86InstrSSE.td | 16 +++-- test/CodeGen/X86/avx512-cvt.ll | 105 +++++++++++++++++++++++++++-- 3 files changed, 138 insertions(+), 12 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index c43e5fb62fa..78b35010be6 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1352,8 +1352,33 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom); setOperationAction(ISD::TRUNCATE, MVT::v8i32, Custom); if (Subtarget->hasDQI()) { - setOperationAction(ISD::TRUNCATE, MVT::v2i1, Custom); - setOperationAction(ISD::TRUNCATE, MVT::v4i1, Custom); + setOperationAction(ISD::TRUNCATE, MVT::v2i1, Custom); + setOperationAction(ISD::TRUNCATE, MVT::v4i1, Custom); + + setOperationAction(ISD::SINT_TO_FP, MVT::v8i64, Legal); + setOperationAction(ISD::UINT_TO_FP, MVT::v8i64, Legal); + setOperationAction(ISD::FP_TO_SINT, MVT::v8i64, Legal); + setOperationAction(ISD::FP_TO_UINT, MVT::v8i64, Legal); + if (Subtarget->hasVLX()) { + setOperationAction(ISD::SINT_TO_FP, MVT::v4i64, Legal); + setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal); + setOperationAction(ISD::UINT_TO_FP, MVT::v4i64, Legal); + setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal); + setOperationAction(ISD::FP_TO_SINT, MVT::v4i64, Legal); + setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal); + setOperationAction(ISD::FP_TO_UINT, MVT::v4i64, Legal); + setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal); + } + } + if (Subtarget->hasVLX()) { + setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal); + setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Legal); + setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal); + setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Legal); + setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal); + setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal); + setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal); + setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal); } setOperationAction(ISD::TRUNCATE, MVT::v8i1, Custom); setOperationAction(ISD::TRUNCATE, MVT::v16i1, Custom); diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 4e1d524e463..90696408963 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -2073,15 +2073,17 @@ def CVTTPS2DQrm : S2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), IIC_SSE_CVT_PS_RM>, Sched<[WriteCvtF2ILd]>; let Predicates = [HasAVX] in { - def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))), - (VCVTDQ2PSrr VR128:$src)>; - def : Pat<(v4f32 (sint_to_fp (bc_v4i32 (loadv2i64 addr:$src)))), - (VCVTDQ2PSrm addr:$src)>; - def : Pat<(int_x86_sse2_cvtdq2ps VR128:$src), (VCVTDQ2PSrr VR128:$src)>; def : Pat<(int_x86_sse2_cvtdq2ps (bc_v4i32 (loadv2i64 addr:$src))), (VCVTDQ2PSrm addr:$src)>; +} + +let Predicates = [HasAVX, NoVLX] in { + def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))), + (VCVTDQ2PSrr VR128:$src)>; + def : Pat<(v4f32 (sint_to_fp (bc_v4i32 (loadv2i64 addr:$src)))), + (VCVTDQ2PSrm addr:$src)>; def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))), (VCVTTPS2DQrr VR128:$src)>; @@ -2149,7 +2151,7 @@ def VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), def : InstAlias<"vcvttpd2dq\t{$src, $dst|$dst, $src}", (VCVTTPD2DQYrr VR128:$dst, VR256:$src), 0>; -let Predicates = [HasAVX] in { +let Predicates = [HasAVX, NoVLX] in { def : Pat<(v4i32 (fp_to_sint (v4f64 VR256:$src))), (VCVTTPD2DQYrr VR256:$src)>; def : Pat<(v4i32 (fp_to_sint (loadv4f64 addr:$src))), @@ -2306,7 +2308,9 @@ let Predicates = [HasAVX] in { (VCVTDQ2PSYrr VR256:$src)>; def : Pat<(int_x86_avx_cvtdq2_ps_256 (bitconvert (loadv4i64 addr:$src))), (VCVTDQ2PSYrm addr:$src)>; +} +let Predicates = [HasAVX, NoVLX] in { // Match fround and fextend for 128/256-bit conversions def : Pat<(v4f32 (X86vfpround (v2f64 VR128:$src))), (VCVTPD2PSrr VR128:$src)>; diff --git a/test/CodeGen/X86/avx512-cvt.ll b/test/CodeGen/X86/avx512-cvt.ll index a211bcd38c9..aa34076c910 100644 --- a/test/CodeGen/X86/avx512-cvt.ll +++ b/test/CodeGen/X86/avx512-cvt.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl --show-mc-encoding | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx --show-mc-encoding | FileCheck %s ; CHECK-LABEL: sitof32 ; CHECK: vcvtdq2ps %zmm @@ -8,6 +8,70 @@ define <16 x float> @sitof32(<16 x i32> %a) nounwind { ret <16 x float> %b } +; CHECK-LABEL: sltof864 +; CHECK: vcvtqq2pd +define <8 x double> @sltof864(<8 x i64> %a) { + %b = sitofp <8 x i64> %a to <8 x double> + ret <8 x double> %b +} + +; CHECK-LABEL: sltof464 +; CHECK: vcvtqq2pd +define <4 x double> @sltof464(<4 x i64> %a) { + %b = sitofp <4 x i64> %a to <4 x double> + ret <4 x double> %b +} + +; CHECK-LABEL: sltof2f32 +; CHECK: vcvtqq2ps +define <2 x float> @sltof2f32(<2 x i64> %a) { + %b = sitofp <2 x i64> %a to <2 x float> + ret <2 x float>%b +} + +; CHECK-LABEL: sltof4f32_mem +; CHECK: vcvtqq2psy (%rdi) +define <4 x float> @sltof4f32_mem(<4 x i64>* %a) { + %a1 = load <4 x i64>, <4 x i64>* %a, align 8 + %b = sitofp <4 x i64> %a1 to <4 x float> + ret <4 x float>%b +} + +; CHECK-LABEL: f64tosl +; CHECK: vcvttpd2qq +define <4 x i64> @f64tosl(<4 x double> %a) { + %b = fptosi <4 x double> %a to <4 x i64> + ret <4 x i64> %b +} + +; CHECK-LABEL: f32tosl +; CHECK: vcvttps2qq +define <4 x i64> @f32tosl(<4 x float> %a) { + %b = fptosi <4 x float> %a to <4 x i64> + ret <4 x i64> %b +} + +; CHECK-LABEL: sltof432 +; CHECK: vcvtqq2ps +define <4 x float> @sltof432(<4 x i64> %a) { + %b = sitofp <4 x i64> %a to <4 x float> + ret <4 x float> %b +} + +; CHECK-LABEL: ultof432 +; CHECK: vcvtuqq2ps +define <4 x float> @ultof432(<4 x i64> %a) { + %b = uitofp <4 x i64> %a to <4 x float> + ret <4 x float> %b +} + +; CHECK-LABEL: ultof64 +; CHECK: vcvtuqq2pd +define <8 x double> @ultof64(<8 x i64> %a) { + %b = uitofp <8 x i64> %a to <8 x double> + ret <8 x double> %b +} + ; CHECK-LABEL: fptosi00 ; CHECK: vcvttps2dq %zmm ; CHECK: ret @@ -64,16 +128,39 @@ define <8 x i32> @fptosi01(<8 x double> %a) { ret <8 x i32> %b } +; CHECK-LABEL: fptosi03 +; CHECK: vcvttpd2dq %ymm +; CHECK: ret +define <4 x i32> @fptosi03(<4 x double> %a) { + %b = fptosi <4 x double> %a to <4 x i32> + ret <4 x i32> %b +} + ; CHECK-LABEL: fptrunc00 ; CHECK: vcvtpd2ps %zmm ; CHECK-NEXT: vcvtpd2ps %zmm -; CHECK-NEXT: vinsertf64x4 $1 +; CHECK-NEXT: vinsertf ; CHECK: ret define <16 x float> @fptrunc00(<16 x double> %b) nounwind { %a = fptrunc <16 x double> %b to <16 x float> ret <16 x float> %a } +; CHECK-LABEL: fptrunc01 +; CHECK: vcvtpd2ps %ymm +define <4 x float> @fptrunc01(<4 x double> %b) { + %a = fptrunc <4 x double> %b to <4 x float> + ret <4 x float> %a +} + +; CHECK-LABEL: fptrunc02 +; CHECK: vcvtpd2ps %ymm0, %xmm0 {%k1} {z} +define <4 x float> @fptrunc02(<4 x double> %b, <4 x i1> %mask) { + %a = fptrunc <4 x double> %b to <4 x float> + %c = select <4 x i1>%mask, <4 x float>%a, <4 x float> zeroinitializer + ret <4 x float> %c +} + ; CHECK-LABEL: fpext00 ; CHECK: vcvtps2pd %ymm0, %zmm0 ; CHECK: ret @@ -82,6 +169,16 @@ define <8 x double> @fpext00(<8 x float> %b) nounwind { ret <8 x double> %a } +; CHECK-LABEL: fpext01 +; CHECK: vcvtps2pd %xmm0, %ymm0 {%k1} {z} +; CHECK: ret +define <4 x double> @fpext01(<4 x float> %b, <4 x double>%b1, <4 x double>%a1) { + %a = fpext <4 x float> %b to <4 x double> + %mask = fcmp ogt <4 x double>%a1, %b1 + %c = select <4 x i1>%mask, <4 x double>%a, <4 x double>zeroinitializer + ret <4 x double> %c +} + ; CHECK-LABEL: funcA ; CHECK: vcvtsi2sdq (%rdi){{.*}} encoding: [0x62 ; CHECK: ret @@ -257,7 +354,7 @@ define double @uitofp03(i32 %a) nounwind { } ; CHECK-LABEL: @sitofp_16i1_float -; CHECK: vpbroadcastd +; CHECK: vpmovm2d ; CHECK: vcvtdq2ps define <16 x float> @sitofp_16i1_float(<16 x i32> %a) { %mask = icmp slt <16 x i32> %a, zeroinitializer @@ -301,7 +398,7 @@ define <8 x double> @sitofp_8i8_double(<8 x i8> %a) { ; CHECK-LABEL: @sitofp_8i1_double -; CHECK: vpbroadcastq +; CHECK: vpmovm2d ; CHECK: vcvtdq2pd define <8 x double> @sitofp_8i1_double(<8 x double> %a) { %cmpres = fcmp ogt <8 x double> %a, zeroinitializer