mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-02-01 05:01:59 +01:00
[AArch64] Don't blindly lower f16/f128 FCCMPs.
Instead, extend f16 (like we do when lowering a standalone SETCC), and let f128 be legalized to the RT calls. Fixes PR26803. llvm-svn: 263301
This commit is contained in:
parent
99affb0978
commit
a2af74a580
@ -1216,8 +1216,14 @@ static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
|
||||
SDLoc dl, SelectionDAG &DAG) {
|
||||
EVT VT = LHS.getValueType();
|
||||
|
||||
if (VT.isFloatingPoint())
|
||||
if (VT.isFloatingPoint()) {
|
||||
assert(VT != MVT::f128);
|
||||
if (VT == MVT::f16) {
|
||||
LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, LHS);
|
||||
RHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, RHS);
|
||||
}
|
||||
return DAG.getNode(AArch64ISD::FCMP, dl, VT, LHS, RHS);
|
||||
}
|
||||
|
||||
// The CMP instruction is just an alias for SUBS, and representing it as
|
||||
// SUBS means that it's possible to get CSE with subtract operations.
|
||||
@ -1301,9 +1307,14 @@ static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS,
|
||||
AArch64CC::CondCode OutCC,
|
||||
SDLoc DL, SelectionDAG &DAG) {
|
||||
unsigned Opcode = 0;
|
||||
if (LHS.getValueType().isFloatingPoint())
|
||||
if (LHS.getValueType().isFloatingPoint()) {
|
||||
assert(LHS.getValueType() != MVT::f128);
|
||||
if (LHS.getValueType() == MVT::f16) {
|
||||
LHS = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, LHS);
|
||||
RHS = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, RHS);
|
||||
}
|
||||
Opcode = AArch64ISD::FCCMP;
|
||||
else if (RHS.getOpcode() == ISD::SUB) {
|
||||
} else if (RHS.getOpcode() == ISD::SUB) {
|
||||
SDValue SubOp0 = RHS.getOperand(0);
|
||||
if (isNullConstant(SubOp0) && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
|
||||
// See emitComparison() on why we can only do this for SETEQ and SETNE.
|
||||
@ -1333,6 +1344,8 @@ static bool isConjunctionDisjunctionTree(const SDValue Val, bool &CanNegate,
|
||||
return false;
|
||||
unsigned Opcode = Val->getOpcode();
|
||||
if (Opcode == ISD::SETCC) {
|
||||
if (Val->getOperand(0).getValueType() == MVT::f128)
|
||||
return false;
|
||||
CanNegate = true;
|
||||
return true;
|
||||
}
|
||||
|
@ -658,4 +658,56 @@ define i32 @select_or_olt_ueq_ogt(double %v0, double %v1, double %v2, double %v3
|
||||
ret i32 %sel
|
||||
}
|
||||
|
||||
; Verify that we correctly promote f16.
|
||||
|
||||
; CHECK-LABEL: half_select_and_olt_oge:
|
||||
; CHECK-LABEL: ; BB#0:
|
||||
; CHECK-DAG: fcvt [[S0:s[0-9]+]], h0
|
||||
; CHECK-DAG: fcvt [[S1:s[0-9]+]], h1
|
||||
; CHECK-NEXT: fcmp [[S0]], [[S1]]
|
||||
; CHECK-DAG: fcvt [[S2:s[0-9]+]], h2
|
||||
; CHECK-DAG: fcvt [[S3:s[0-9]+]], h3
|
||||
; CHECK-NEXT: fccmp [[S2]], [[S3]], #8, mi
|
||||
; CHECK-NEXT: csel w0, w0, w1, ge
|
||||
; CHECK-NEXT: ret
|
||||
define i32 @half_select_and_olt_oge(half %v0, half %v1, half %v2, half %v3, i32 %a, i32 %b) #0 {
|
||||
%c0 = fcmp olt half %v0, %v1
|
||||
%c1 = fcmp oge half %v2, %v3
|
||||
%cr = and i1 %c1, %c0
|
||||
%sel = select i1 %cr, i32 %a, i32 %b
|
||||
ret i32 %sel
|
||||
}
|
||||
|
||||
; CHECK-LABEL: half_select_and_olt_one:
|
||||
; CHECK-LABEL: ; BB#0:
|
||||
; CHECK-DAG: fcvt [[S0:s[0-9]+]], h0
|
||||
; CHECK-DAG: fcvt [[S1:s[0-9]+]], h1
|
||||
; CHECK-NEXT: fcmp [[S0]], [[S1]]
|
||||
; CHECK-DAG: fcvt [[S2:s[0-9]+]], h2
|
||||
; CHECK-DAG: fcvt [[S3:s[0-9]+]], h3
|
||||
; CHECK-NEXT: fccmp [[S2]], [[S3]], #4, mi
|
||||
; CHECK-NEXT: fccmp [[S2]], [[S3]], #1, ne
|
||||
; CHECK-NEXT: csel w0, w0, w1, vc
|
||||
; CHECK-NEXT: ret
|
||||
define i32 @half_select_and_olt_one(half %v0, half %v1, half %v2, half %v3, i32 %a, i32 %b) #0 {
|
||||
%c0 = fcmp olt half %v0, %v1
|
||||
%c1 = fcmp one half %v2, %v3
|
||||
%cr = and i1 %c1, %c0
|
||||
%sel = select i1 %cr, i32 %a, i32 %b
|
||||
ret i32 %sel
|
||||
}
|
||||
|
||||
; Also verify that we don't try to generate f128 FCCMPs, using RT calls instead.
|
||||
|
||||
; CHECK-LABEL: f128_select_and_olt_oge:
|
||||
; CHECK: bl ___lttf2
|
||||
; CHECK: bl ___getf2
|
||||
define i32 @f128_select_and_olt_oge(fp128 %v0, fp128 %v1, fp128 %v2, fp128 %v3, i32 %a, i32 %b) #0 {
|
||||
%c0 = fcmp olt fp128 %v0, %v1
|
||||
%c1 = fcmp oge fp128 %v2, %v3
|
||||
%cr = and i1 %c1, %c0
|
||||
%sel = select i1 %cr, i32 %a, i32 %b
|
||||
ret i32 %sel
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
|
Loading…
x
Reference in New Issue
Block a user