mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-24 03:33:20 +01:00
[SelectionDAG] Improve support for promotion of <1 x fX> floating point argument types (PR31088)
PR31088 demonstrated that we were assuming that only integers require promotion from <1 x iX> types, when in fact float types may require it as well - in this case half floats. This patch adds support for extension/truncation for both integer and float types. Differential Revision: https://reviews.llvm.org/D32391 llvm-svn: 301910
This commit is contained in:
parent
a02834c8c0
commit
9d13cd153b
@ -688,6 +688,10 @@ public:
|
|||||||
/// Example: shuffle A, B, <0,5,2,7> -> shuffle B, A, <4,1,6,3>
|
/// Example: shuffle A, B, <0,5,2,7> -> shuffle B, A, <4,1,6,3>
|
||||||
SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV);
|
SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV);
|
||||||
|
|
||||||
|
/// Convert Op, which must be of float type, to the
|
||||||
|
/// float type VT, by either extending or rounding (by truncation).
|
||||||
|
SDValue getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT);
|
||||||
|
|
||||||
/// Convert Op, which must be of integer type, to the
|
/// Convert Op, which must be of integer type, to the
|
||||||
/// integer type VT, by either any-extending or truncating it.
|
/// integer type VT, by either any-extending or truncating it.
|
||||||
SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT);
|
SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT);
|
||||||
|
@ -529,11 +529,12 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
|
|||||||
EVT VT = N->getValueType(0);
|
EVT VT = N->getValueType(0);
|
||||||
SDValue Res = GetScalarizedVector(N->getOperand(0));
|
SDValue Res = GetScalarizedVector(N->getOperand(0));
|
||||||
if (Res.getValueType() != VT)
|
if (Res.getValueType() != VT)
|
||||||
Res = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, Res);
|
Res = VT.isFloatingPoint()
|
||||||
|
? DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, Res)
|
||||||
|
: DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, Res);
|
||||||
return Res;
|
return Res;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/// If the input condition is a vector that needs to be scalarized, it must be
|
/// If the input condition is a vector that needs to be scalarized, it must be
|
||||||
/// <1 x i1>, so just convert to a normal ISD::SELECT
|
/// <1 x i1>, so just convert to a normal ISD::SELECT
|
||||||
/// (still with vector output type since that was acceptable if we got here).
|
/// (still with vector output type since that was acceptable if we got here).
|
||||||
|
@ -959,6 +959,12 @@ void SelectionDAG::clear() {
|
|||||||
DbgInfo->clear();
|
DbgInfo->clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
SDValue SelectionDAG::getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT) {
|
||||||
|
return VT.bitsGT(Op.getValueType())
|
||||||
|
? getNode(ISD::FP_EXTEND, DL, VT, Op)
|
||||||
|
: getNode(ISD::FP_ROUND, DL, VT, Op, getIntPtrConstant(0, DL));
|
||||||
|
}
|
||||||
|
|
||||||
SDValue SelectionDAG::getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT) {
|
SDValue SelectionDAG::getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT) {
|
||||||
return VT.bitsGT(Op.getValueType()) ?
|
return VT.bitsGT(Op.getValueType()) ?
|
||||||
getNode(ISD::ANY_EXTEND, DL, VT, Op) :
|
getNode(ISD::ANY_EXTEND, DL, VT, Op) :
|
||||||
|
@ -350,7 +350,8 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
|
|||||||
|
|
||||||
EVT ValueSVT = ValueVT.getVectorElementType();
|
EVT ValueSVT = ValueVT.getVectorElementType();
|
||||||
if (ValueVT.getVectorNumElements() == 1 && ValueSVT != PartEVT)
|
if (ValueVT.getVectorNumElements() == 1 && ValueSVT != PartEVT)
|
||||||
Val = DAG.getAnyExtOrTrunc(Val, DL, ValueSVT);
|
Val = ValueVT.isFloatingPoint() ? DAG.getFPExtendOrRound(Val, DL, ValueSVT)
|
||||||
|
: DAG.getAnyExtOrTrunc(Val, DL, ValueSVT);
|
||||||
|
|
||||||
return DAG.getBuildVector(ValueVT, DL, Val);
|
return DAG.getBuildVector(ValueVT, DL, Val);
|
||||||
}
|
}
|
||||||
@ -543,10 +544,9 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
|
|||||||
Val = DAG.getNode(
|
Val = DAG.getNode(
|
||||||
ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val,
|
ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val,
|
||||||
DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
|
DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
|
||||||
|
|
||||||
Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
assert(Val.getValueType() == PartVT && "Unexpected vector part value type");
|
||||||
Parts[0] = Val;
|
Parts[0] = Val;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -36,6 +36,21 @@ define half @test_fadd(half %a, half %b) #0 {
|
|||||||
ret half %r
|
ret half %r
|
||||||
}
|
}
|
||||||
|
|
||||||
|
; CHECK-LABEL: test_fadd_v1f16(
|
||||||
|
; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fadd_v1f16_param_0];
|
||||||
|
; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fadd_v1f16_param_1];
|
||||||
|
; CHECK-F16-NEXT: add.rn.f16 [[R:%h[0-9]+]], [[A]], [[B]];
|
||||||
|
; CHECK-NOF16-DAG: cvt.f32.f16 [[A32:%f[0-9]+]], [[A]]
|
||||||
|
; CHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]]
|
||||||
|
; CHECK-NOF16-NEXT: add.rn.f32 [[R32:%f[0-9]+]], [[A32]], [[B32]];
|
||||||
|
; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]]
|
||||||
|
; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]];
|
||||||
|
; CHECK-NEXT: ret;
|
||||||
|
define <1 x half> @test_fadd_v1f16(<1 x half> %a, <1 x half> %b) #0 {
|
||||||
|
%r = fadd <1 x half> %a, %b
|
||||||
|
ret <1 x half> %r
|
||||||
|
}
|
||||||
|
|
||||||
; Check that we can lower fadd with immediate arguments.
|
; Check that we can lower fadd with immediate arguments.
|
||||||
; CHECK-LABEL: test_fadd_imm_0(
|
; CHECK-LABEL: test_fadd_imm_0(
|
||||||
; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fadd_imm_0_param_0];
|
; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fadd_imm_0_param_0];
|
||||||
|
@ -3,6 +3,63 @@
|
|||||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X64
|
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X64
|
||||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+f16c | FileCheck %s --check-prefix=F16C
|
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+f16c | FileCheck %s --check-prefix=F16C
|
||||||
|
|
||||||
|
define <1 x half> @ir_fadd_v1f16(<1 x half> %arg0, <1 x half> %arg1) nounwind {
|
||||||
|
; X86-LABEL: ir_fadd_v1f16:
|
||||||
|
; X86: # BB#0:
|
||||||
|
; X86-NEXT: subl $28, %esp
|
||||||
|
; X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||||
|
; X86-NEXT: movss %xmm0, (%esp)
|
||||||
|
; X86-NEXT: calll __gnu_f2h_ieee
|
||||||
|
; X86-NEXT: movzwl %ax, %eax
|
||||||
|
; X86-NEXT: movl %eax, (%esp)
|
||||||
|
; X86-NEXT: calll __gnu_h2f_ieee
|
||||||
|
; X86-NEXT: fstpt {{[0-9]+}}(%esp) # 10-byte Folded Spill
|
||||||
|
; X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||||
|
; X86-NEXT: movss %xmm0, (%esp)
|
||||||
|
; X86-NEXT: calll __gnu_f2h_ieee
|
||||||
|
; X86-NEXT: movzwl %ax, %eax
|
||||||
|
; X86-NEXT: movl %eax, (%esp)
|
||||||
|
; X86-NEXT: fldt {{[0-9]+}}(%esp) # 10-byte Folded Reload
|
||||||
|
; X86-NEXT: fstps {{[0-9]+}}(%esp)
|
||||||
|
; X86-NEXT: calll __gnu_h2f_ieee
|
||||||
|
; X86-NEXT: fstps {{[0-9]+}}(%esp)
|
||||||
|
; X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||||
|
; X86-NEXT: addss {{[0-9]+}}(%esp), %xmm0
|
||||||
|
; X86-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
|
||||||
|
; X86-NEXT: flds {{[0-9]+}}(%esp)
|
||||||
|
; X86-NEXT: addl $28, %esp
|
||||||
|
; X86-NEXT: retl
|
||||||
|
;
|
||||||
|
; X64-LABEL: ir_fadd_v1f16:
|
||||||
|
; X64: # BB#0:
|
||||||
|
; X64-NEXT: pushq %rax
|
||||||
|
; X64-NEXT: movss %xmm0, {{[0-9]+}}(%rsp) # 4-byte Spill
|
||||||
|
; X64-NEXT: movaps %xmm1, %xmm0
|
||||||
|
; X64-NEXT: callq __gnu_f2h_ieee
|
||||||
|
; X64-NEXT: movzwl %ax, %edi
|
||||||
|
; X64-NEXT: callq __gnu_h2f_ieee
|
||||||
|
; X64-NEXT: movss %xmm0, (%rsp) # 4-byte Spill
|
||||||
|
; X64-NEXT: movss {{[0-9]+}}(%rsp), %xmm0 # 4-byte Reload
|
||||||
|
; X64-NEXT: # xmm0 = mem[0],zero,zero,zero
|
||||||
|
; X64-NEXT: callq __gnu_f2h_ieee
|
||||||
|
; X64-NEXT: movzwl %ax, %edi
|
||||||
|
; X64-NEXT: callq __gnu_h2f_ieee
|
||||||
|
; X64-NEXT: addss (%rsp), %xmm0 # 4-byte Folded Reload
|
||||||
|
; X64-NEXT: popq %rax
|
||||||
|
; X64-NEXT: retq
|
||||||
|
;
|
||||||
|
; F16C-LABEL: ir_fadd_v1f16:
|
||||||
|
; F16C: # BB#0:
|
||||||
|
; F16C-NEXT: vcvtps2ph $4, %xmm1, %xmm1
|
||||||
|
; F16C-NEXT: vcvtph2ps %xmm1, %xmm1
|
||||||
|
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
|
||||||
|
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
|
||||||
|
; F16C-NEXT: vaddss %xmm1, %xmm0, %xmm0
|
||||||
|
; F16C-NEXT: retq
|
||||||
|
%retval = fadd <1 x half> %arg0, %arg1
|
||||||
|
ret <1 x half> %retval
|
||||||
|
}
|
||||||
|
|
||||||
define <2 x half> @ir_fadd_v2f16(<2 x half> %arg0, <2 x half> %arg1) nounwind {
|
define <2 x half> @ir_fadd_v2f16(<2 x half> %arg0, <2 x half> %arg1) nounwind {
|
||||||
; X86-LABEL: ir_fadd_v2f16:
|
; X86-LABEL: ir_fadd_v2f16:
|
||||||
; X86: # BB#0:
|
; X86: # BB#0:
|
||||||
|
Loading…
Reference in New Issue
Block a user