mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
Add support for FP_ROUND from v2f64 to v2f32
- Due to the current matching vector elements constraints in ISD::FP_ROUND, rounding from v2f64 to v4f32 (after legalization from v2f32) is scalarized. Add a customized v2f32 widening to convert it into a target-specific X86ISD::VFPROUND to work around this constraints. llvm-svn: 165631
This commit is contained in:
parent
c434bfd7e4
commit
6a09ff62ba
@ -940,6 +940,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
|
||||
setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
|
||||
|
||||
setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
|
||||
setOperationAction(ISD::FP_ROUND, MVT::v2f32, Custom);
|
||||
|
||||
setLoadExtAction(ISD::EXTLOAD, MVT::v2f32, Legal);
|
||||
}
|
||||
@ -11468,6 +11469,11 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
|
||||
}
|
||||
return;
|
||||
}
|
||||
case ISD::FP_ROUND: {
|
||||
SDValue V = DAG.getNode(X86ISD::VFPROUND, dl, MVT::v4f32, N->getOperand(0));
|
||||
Results.push_back(V);
|
||||
return;
|
||||
}
|
||||
case ISD::READCYCLECOUNTER: {
|
||||
SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
|
||||
SDValue TheChain = N->getOperand(0);
|
||||
@ -11662,6 +11668,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
|
||||
case X86ISD::VSEXT_MOVL: return "X86ISD::VSEXT_MOVL";
|
||||
case X86ISD::VZEXT_LOAD: return "X86ISD::VZEXT_LOAD";
|
||||
case X86ISD::VFPEXT: return "X86ISD::VFPEXT";
|
||||
case X86ISD::VFPROUND: return "X86ISD::VFPROUND";
|
||||
case X86ISD::VSHLDQ: return "X86ISD::VSHLDQ";
|
||||
case X86ISD::VSRLDQ: return "X86ISD::VSRLDQ";
|
||||
case X86ISD::VSHL: return "X86ISD::VSHL";
|
||||
|
@ -233,6 +233,9 @@ namespace llvm {
|
||||
// VFPEXT - Vector FP extend.
|
||||
VFPEXT,
|
||||
|
||||
// VFPROUND - Vector FP round.
|
||||
VFPROUND,
|
||||
|
||||
// VSHL, VSRL - 128-bit vector logical left / right shift
|
||||
VSHLDQ, VSRLDQ,
|
||||
|
||||
|
@ -93,6 +93,9 @@ def X86vzload : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad,
|
||||
def X86vfpext : SDNode<"X86ISD::VFPEXT",
|
||||
SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
|
||||
SDTCisFP<0>, SDTCisFP<1>]>>;
|
||||
def X86vfpround: SDNode<"X86ISD::VFPROUND",
|
||||
SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
|
||||
SDTCisFP<0>, SDTCisFP<1>]>>;
|
||||
|
||||
def X86vshldq : SDNode<"X86ISD::VSHLDQ", SDTIntShiftOp>;
|
||||
def X86vshrdq : SDNode<"X86ISD::VSRLDQ", SDTIntShiftOp>;
|
||||
|
@ -2125,6 +2125,10 @@ let Predicates = [HasAVX] in {
|
||||
(VCVTDQ2PSYrm addr:$src)>;
|
||||
|
||||
// Match fround and fextend for 128/256-bit conversions
|
||||
def : Pat<(v4f32 (X86vfpround (v2f64 VR128:$src))),
|
||||
(VCVTPD2PSrr VR128:$src)>;
|
||||
def : Pat<(v4f32 (X86vfpround (memopv2f64 addr:$src))),
|
||||
(VCVTPD2PSXrm addr:$src)>;
|
||||
def : Pat<(v4f32 (fround (v4f64 VR256:$src))),
|
||||
(VCVTPD2PSYrr VR256:$src)>;
|
||||
def : Pat<(v4f32 (fround (loadv4f64 addr:$src))),
|
||||
@ -2139,7 +2143,12 @@ let Predicates = [HasAVX] in {
|
||||
}
|
||||
|
||||
let Predicates = [UseSSE2] in {
|
||||
// Match fextend for 128 conversions
|
||||
// Match fround and fextend for 128 conversions
|
||||
def : Pat<(v4f32 (X86vfpround (v2f64 VR128:$src))),
|
||||
(CVTPD2PSrr VR128:$src)>;
|
||||
def : Pat<(v4f32 (X86vfpround (memopv2f64 addr:$src))),
|
||||
(CVTPD2PSrm addr:$src)>;
|
||||
|
||||
def : Pat<(v2f64 (X86vfpext (v4f32 VR128:$src))),
|
||||
(CVTPS2PDrr VR128:$src)>;
|
||||
}
|
||||
|
61
test/CodeGen/X86/fp-load-trunc.ll
Normal file
61
test/CodeGen/X86/fp-load-trunc.ll
Normal file
@ -0,0 +1,61 @@
|
||||
; RUN: llc < %s -march=x86 -mattr=+sse2,-avx | FileCheck %s
|
||||
; RUN: llc < %s -march=x86 -mattr=+avx | FileCheck %s --check-prefix=AVX
|
||||
|
||||
define <1 x float> @test1(<1 x double>* %p) nounwind {
|
||||
; CHECK: test1
|
||||
; CHECK: cvtsd2ss
|
||||
; CHECK: ret
|
||||
; AVX: test1
|
||||
; AVX: vcvtsd2ss
|
||||
; AVX: ret
|
||||
%x = load <1 x double>* %p
|
||||
%y = fptrunc <1 x double> %x to <1 x float>
|
||||
ret <1 x float> %y
|
||||
}
|
||||
|
||||
define <2 x float> @test2(<2 x double>* %p) nounwind {
|
||||
; CHECK: test2
|
||||
; CHECK: cvtpd2ps {{[0-9]*}}(%{{.*}})
|
||||
; CHECK: ret
|
||||
; AVX: test2
|
||||
; AVX: vcvtpd2psx {{[0-9]*}}(%{{.*}})
|
||||
; AVX: ret
|
||||
%x = load <2 x double>* %p
|
||||
%y = fptrunc <2 x double> %x to <2 x float>
|
||||
ret <2 x float> %y
|
||||
}
|
||||
|
||||
define <4 x float> @test3(<4 x double>* %p) nounwind {
|
||||
; CHECK: test3
|
||||
; CHECK: cvtpd2ps {{[0-9]*}}(%{{.*}})
|
||||
; CHECK: cvtpd2ps {{[0-9]*}}(%{{.*}})
|
||||
; CHECK: movlhps
|
||||
; CHECK: ret
|
||||
; AVX: test3
|
||||
; AVX: vcvtpd2psy {{[0-9]*}}(%{{.*}})
|
||||
; AVX: ret
|
||||
%x = load <4 x double>* %p
|
||||
%y = fptrunc <4 x double> %x to <4 x float>
|
||||
ret <4 x float> %y
|
||||
}
|
||||
|
||||
define <8 x float> @test4(<8 x double>* %p) nounwind {
|
||||
; CHECK: test4
|
||||
; CHECK: cvtpd2ps {{[0-9]*}}(%{{.*}})
|
||||
; CHECK: cvtpd2ps {{[0-9]*}}(%{{.*}})
|
||||
; CHECK: movlhps
|
||||
; CHECK: cvtpd2ps {{[0-9]*}}(%{{.*}})
|
||||
; CHECK: cvtpd2ps {{[0-9]*}}(%{{.*}})
|
||||
; CHECK: movlhps
|
||||
; CHECK: ret
|
||||
; AVX: test4
|
||||
; AVX: vcvtpd2psy {{[0-9]*}}(%{{.*}})
|
||||
; AVX: vcvtpd2psy {{[0-9]*}}(%{{.*}})
|
||||
; AVX: vinsertf128
|
||||
; AVX: ret
|
||||
%x = load <8 x double>* %p
|
||||
%y = fptrunc <8 x double> %x to <8 x float>
|
||||
ret <8 x float> %y
|
||||
}
|
||||
|
||||
|
@ -1,33 +1,56 @@
|
||||
; RUN: llc < %s -march=x86 -mattr=+sse2,-avx | FileCheck %s
|
||||
; RUN: llc < %s -march=x86 -mattr=+avx | FileCheck %s --check-prefix=AVX
|
||||
|
||||
define <1 x float> @test1(<1 x double> %x) nounwind {
|
||||
; CHECK: test1
|
||||
; CHECK: cvtsd2ss
|
||||
; CHECK: ret
|
||||
; AVX: test1
|
||||
; AVX: vcvtsd2ss
|
||||
; AVX: ret
|
||||
%y = fptrunc <1 x double> %x to <1 x float>
|
||||
ret <1 x float> %y
|
||||
}
|
||||
|
||||
|
||||
define <2 x float> @test2(<2 x double> %x) nounwind {
|
||||
; FIXME: It would be nice if this compiled down to a cvtpd2ps
|
||||
; CHECK: cvtsd2ss
|
||||
; CHECK: cvtsd2ss
|
||||
; CHECK: test2
|
||||
; CHECK: cvtpd2ps
|
||||
; CHECK: ret
|
||||
; AVX: test2
|
||||
; AVX-NOT: vcvtpd2psy
|
||||
; AVX: vcvtpd2ps
|
||||
; AVX: ret
|
||||
%y = fptrunc <2 x double> %x to <2 x float>
|
||||
ret <2 x float> %y
|
||||
}
|
||||
|
||||
define <8 x float> @test3(<8 x double> %x) nounwind {
|
||||
; FIXME: It would be nice if this compiled down to a series of cvtpd2ps
|
||||
; CHECK: cvtsd2ss
|
||||
; CHECK: cvtsd2ss
|
||||
; CHECK: cvtsd2ss
|
||||
; CHECK: cvtsd2ss
|
||||
; CHECK: cvtsd2ss
|
||||
; CHECK: cvtsd2ss
|
||||
; CHECK: cvtsd2ss
|
||||
; CHECK: cvtsd2ss
|
||||
define <4 x float> @test3(<4 x double> %x) nounwind {
|
||||
; CHECK: test3
|
||||
; CHECK: cvtpd2ps
|
||||
; CHECK: cvtpd2ps
|
||||
; CHECK: movlhps
|
||||
; CHECK: ret
|
||||
; AVX: test3
|
||||
; AVX: vcvtpd2psy
|
||||
; AVX: ret
|
||||
%y = fptrunc <4 x double> %x to <4 x float>
|
||||
ret <4 x float> %y
|
||||
}
|
||||
|
||||
define <8 x float> @test4(<8 x double> %x) nounwind {
|
||||
; CHECK: test4
|
||||
; CHECK: cvtpd2ps
|
||||
; CHECK: cvtpd2ps
|
||||
; CHECK: movlhps
|
||||
; CHECK: cvtpd2ps
|
||||
; CHECK: cvtpd2ps
|
||||
; CHECK: movlhps
|
||||
; CHECK: ret
|
||||
; AVX: test4
|
||||
; AVX: vcvtpd2psy
|
||||
; AVX: vcvtpd2psy
|
||||
; AVX: vinsertf128
|
||||
; AVX: ret
|
||||
%y = fptrunc <8 x double> %x to <8 x float>
|
||||
ret <8 x float> %y
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user