1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 18:54:02 +01:00

[SelectionDAG] Don't scalarize vector fpround sources that don't need it.

Similar to the workaround code in ScalarizeVecRes_UnaryOp, ScalarizeVecRes_SETCC
, ScalarizeVecRes_VSELECT, etc.

If we have a case like this:

```
define <1 x half> @func(<1 x float> %x) {
  %tmp = fptrunc <1 x float> %x to <1 x half>
  ret <1 x half> %tmp
}
```

On AArch64, the <1 x float> is legal. So, this will crash if we call
GetScalarizedVector on it.

Differential Revision: https://reviews.llvm.org/D98208
This commit is contained in:
Jessica Paquette 2021-03-08 11:32:58 -08:00
parent 0965edc9f1
commit 5d505e26ca
2 changed files with 42 additions and 4 deletions

View File

@ -318,10 +318,21 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
}
SDValue DAGTypeLegalizer::ScalarizeVecRes_FP_ROUND(SDNode *N) {
EVT NewVT = N->getValueType(0).getVectorElementType();
SDValue Op = GetScalarizedVector(N->getOperand(0));
return DAG.getNode(ISD::FP_ROUND, SDLoc(N),
NewVT, Op, N->getOperand(1));
SDLoc DL(N);
SDValue Op = N->getOperand(0);
EVT OpVT = Op.getValueType();
// The result needs scalarizing, but it's not a given that the source does.
// See similar logic in ScalarizeVecRes_UnaryOp.
if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) {
Op = GetScalarizedVector(Op);
} else {
EVT VT = OpVT.getVectorElementType();
Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op,
DAG.getVectorIdxConstant(0, DL));
}
return DAG.getNode(ISD::FP_ROUND, DL,
N->getValueType(0).getVectorElementType(), Op,
N->getOperand(1));
}
SDValue DAGTypeLegalizer::ScalarizeVecRes_FPOWI(SDNode *N) {

View File

@ -199,6 +199,33 @@ define <2 x float> @test_vcvt_f32_f64(<2 x double> %v) nounwind readnone ssp {
ret <2 x float> %vcvt1.i
}
define half @test_vcvt_f16_f32(<1 x float> %x) {
; GENERIC-LABEL: test_vcvt_f16_f32:
; GENERIC: // %bb.0:
; GENERIC-NEXT: // kill: def $d0 killed $d0 def $q0
; GENERIC-NEXT: fcvt h0, s0
; GENERIC-NEXT: ret
;
; FAST-LABEL: test_vcvt_f16_f32:
; FAST: // %bb.0:
; FAST-NEXT: mov.16b v1, v0
; FAST-NEXT: // implicit-def: $q0
; FAST-NEXT: mov.16b v0, v1
; FAST-NEXT: // kill: def $s0 killed $s0 killed $q0
; FAST-NEXT: fcvt h0, s0
; FAST-NEXT: ret
;
; GISEL-LABEL: test_vcvt_f16_f32:
; GISEL: // %bb.0:
; GISEL-NEXT: fmov x8, d0
; GISEL-NEXT: fmov s0, w8
; GISEL-NEXT: fcvt h0, s0
; GISEL-NEXT: ret
%tmp = fptrunc <1 x float> %x to <1 x half>
%elt = extractelement <1 x half> %tmp, i32 0
ret half %elt
}
; FALLBACK-NOT: remark{{.*}}G_FPEXT{{.*}}(in function: test_vcvt_high_f32_f64)
; FALLBACK-NOT: remark{{.*}}fpext{{.*}}(in function: test_vcvt_high_f32_f64)
define <4 x float> @test_vcvt_high_f32_f64(<2 x float> %x, <2 x double> %v) nounwind readnone ssp {